xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (revision 8311bc5f17dec348749f763b82dfe2737bc53cd7)
1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISC-V target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMCTargetDesc.h"
16 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVISelLowering.h"
18 #include "RISCVMachineFunctionInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/IR/IntrinsicsRISCV.h"
21 #include "llvm/Support/Alignment.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <optional>
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "riscv-isel"
30 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
31 
32 namespace llvm::RISCV {
33 #define GET_RISCVVSSEGTable_IMPL
34 #define GET_RISCVVLSEGTable_IMPL
35 #define GET_RISCVVLXSEGTable_IMPL
36 #define GET_RISCVVSXSEGTable_IMPL
37 #define GET_RISCVVLETable_IMPL
38 #define GET_RISCVVSETable_IMPL
39 #define GET_RISCVVLXTable_IMPL
40 #define GET_RISCVVSXTable_IMPL
41 #define GET_RISCVMaskedPseudosTable_IMPL
42 #include "RISCVGenSearchableTables.inc"
43 } // namespace llvm::RISCV
44 
45 void RISCVDAGToDAGISel::PreprocessISelDAG() {
46   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
47 
48   bool MadeChange = false;
49   while (Position != CurDAG->allnodes_begin()) {
50     SDNode *N = &*--Position;
51     if (N->use_empty())
52       continue;
53 
54     SDValue Result;
55     switch (N->getOpcode()) {
56     case ISD::SPLAT_VECTOR: {
57       // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
58       // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
59       MVT VT = N->getSimpleValueType(0);
60       unsigned Opc =
61           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
62       SDLoc DL(N);
63       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
64       Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
65                                N->getOperand(0), VL);
66       break;
67     }
68     case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
69       // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
70       // load. Done after lowering and combining so that we have a chance to
71       // optimize this to VMV_V_X_VL when the upper bits aren't needed.
72       assert(N->getNumOperands() == 4 && "Unexpected number of operands");
73       MVT VT = N->getSimpleValueType(0);
74       SDValue Passthru = N->getOperand(0);
75       SDValue Lo = N->getOperand(1);
76       SDValue Hi = N->getOperand(2);
77       SDValue VL = N->getOperand(3);
78       assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
79              Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
80              "Unexpected VTs!");
81       MachineFunction &MF = CurDAG->getMachineFunction();
82       SDLoc DL(N);
83 
84       // Create temporary stack for each expanding node.
85       SDValue StackSlot =
86           CurDAG->CreateStackTemporary(TypeSize::Fixed(8), Align(4));
87       int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
88       MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
89 
90       SDValue Chain = CurDAG->getEntryNode();
91       Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
92 
93       SDValue OffsetSlot =
94           CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
95       Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
96                             Align(8));
97 
98       Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
99 
100       SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
101       SDValue IntID =
102           CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
103       SDValue Ops[] = {Chain,
104                        IntID,
105                        Passthru,
106                        StackSlot,
107                        CurDAG->getRegister(RISCV::X0, MVT::i64),
108                        VL};
109 
110       Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
111                                            MVT::i64, MPI, Align(8),
112                                            MachineMemOperand::MOLoad);
113       break;
114     }
115     }
116 
117     if (Result) {
118       LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld:    ");
119       LLVM_DEBUG(N->dump(CurDAG));
120       LLVM_DEBUG(dbgs() << "\nNew: ");
121       LLVM_DEBUG(Result->dump(CurDAG));
122       LLVM_DEBUG(dbgs() << "\n");
123 
124       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
125       MadeChange = true;
126     }
127   }
128 
129   if (MadeChange)
130     CurDAG->RemoveDeadNodes();
131 }
132 
133 void RISCVDAGToDAGISel::PostprocessISelDAG() {
134   HandleSDNode Dummy(CurDAG->getRoot());
135   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
136 
137   bool MadeChange = false;
138   while (Position != CurDAG->allnodes_begin()) {
139     SDNode *N = &*--Position;
140     // Skip dead nodes and any non-machine opcodes.
141     if (N->use_empty() || !N->isMachineOpcode())
142       continue;
143 
144     MadeChange |= doPeepholeSExtW(N);
145     MadeChange |= doPeepholeMaskedRVV(N);
146   }
147 
148   CurDAG->setRoot(Dummy.getValue());
149 
150   MadeChange |= doPeepholeMergeVVMFold();
151 
152   if (MadeChange)
153     CurDAG->RemoveDeadNodes();
154 }
155 
156 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
157                             RISCVMatInt::InstSeq &Seq) {
158   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
159   for (const RISCVMatInt::Inst &Inst : Seq) {
160     SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
161     SDNode *Result = nullptr;
162     switch (Inst.getOpndKind()) {
163     case RISCVMatInt::Imm:
164       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
165       break;
166     case RISCVMatInt::RegX0:
167       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
168                                       CurDAG->getRegister(RISCV::X0, VT));
169       break;
170     case RISCVMatInt::RegReg:
171       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
172       break;
173     case RISCVMatInt::RegImm:
174       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
175       break;
176     }
177 
178     // Only the first instruction has X0 as its source.
179     SrcReg = SDValue(Result, 0);
180   }
181 
182   return SrcReg;
183 }
184 
185 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
186                          int64_t Imm, const RISCVSubtarget &Subtarget) {
187   RISCVMatInt::InstSeq Seq =
188       RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
189 
190   // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at
191   // worst an LUI+ADDIW. This will require an extra register, but avoids a
192   // constant pool.
193   if (Seq.size() > 3) {
194     int64_t LoVal = SignExtend64<32>(Imm);
195     int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
196     if (LoVal == HiVal) {
197       RISCVMatInt::InstSeq SeqLo =
198           RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
199       if ((SeqLo.size() + 2) < Seq.size()) {
200         SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
201 
202         SDValue SLLI = SDValue(
203             CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
204                                    CurDAG->getTargetConstant(32, DL, VT)),
205             0);
206         return SDValue(CurDAG->getMachineNode(RISCV::ADD, DL, VT, Lo, SLLI),
207                        0);
208       }
209     }
210   }
211 
212   // Otherwise, use the original sequence.
213   return selectImmSeq(CurDAG, DL, VT, Seq);
214 }
215 
216 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
217                            unsigned NF, RISCVII::VLMUL LMUL) {
218   static const unsigned M1TupleRegClassIDs[] = {
219       RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
220       RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
221       RISCV::VRN8M1RegClassID};
222   static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
223                                                 RISCV::VRN3M2RegClassID,
224                                                 RISCV::VRN4M2RegClassID};
225 
226   assert(Regs.size() >= 2 && Regs.size() <= 8);
227 
228   unsigned RegClassID;
229   unsigned SubReg0;
230   switch (LMUL) {
231   default:
232     llvm_unreachable("Invalid LMUL.");
233   case RISCVII::VLMUL::LMUL_F8:
234   case RISCVII::VLMUL::LMUL_F4:
235   case RISCVII::VLMUL::LMUL_F2:
236   case RISCVII::VLMUL::LMUL_1:
237     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
238                   "Unexpected subreg numbering");
239     SubReg0 = RISCV::sub_vrm1_0;
240     RegClassID = M1TupleRegClassIDs[NF - 2];
241     break;
242   case RISCVII::VLMUL::LMUL_2:
243     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
244                   "Unexpected subreg numbering");
245     SubReg0 = RISCV::sub_vrm2_0;
246     RegClassID = M2TupleRegClassIDs[NF - 2];
247     break;
248   case RISCVII::VLMUL::LMUL_4:
249     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
250                   "Unexpected subreg numbering");
251     SubReg0 = RISCV::sub_vrm4_0;
252     RegClassID = RISCV::VRN2M4RegClassID;
253     break;
254   }
255 
256   SDLoc DL(Regs[0]);
257   SmallVector<SDValue, 8> Ops;
258 
259   Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
260 
261   for (unsigned I = 0; I < Regs.size(); ++I) {
262     Ops.push_back(Regs[I]);
263     Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
264   }
265   SDNode *N =
266       CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
267   return SDValue(N, 0);
268 }
269 
270 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
271     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
272     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
273     bool IsLoad, MVT *IndexVT) {
274   SDValue Chain = Node->getOperand(0);
275   SDValue Glue;
276 
277   Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
278 
279   if (IsStridedOrIndexed) {
280     Operands.push_back(Node->getOperand(CurOp++)); // Index.
281     if (IndexVT)
282       *IndexVT = Operands.back()->getSimpleValueType(0);
283   }
284 
285   if (IsMasked) {
286     // Mask needs to be copied to V0.
287     SDValue Mask = Node->getOperand(CurOp++);
288     Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
289     Glue = Chain.getValue(1);
290     Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
291   }
292   SDValue VL;
293   selectVLOp(Node->getOperand(CurOp++), VL);
294   Operands.push_back(VL);
295 
296   MVT XLenVT = Subtarget->getXLenVT();
297   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
298   Operands.push_back(SEWOp);
299 
300   // At the IR layer, all the masked load intrinsics have policy operands,
301   // none of the others do.  All have passthru operands.  For our pseudos,
302   // all loads have policy operands.
303   if (IsLoad) {
304     uint64_t Policy = RISCVII::MASK_AGNOSTIC;
305     if (IsMasked)
306       Policy = Node->getConstantOperandVal(CurOp++);
307     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
308     Operands.push_back(PolicyOp);
309   }
310 
311   Operands.push_back(Chain); // Chain.
312   if (Glue)
313     Operands.push_back(Glue);
314 }
315 
316 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
317                                     bool IsStrided) {
318   SDLoc DL(Node);
319   unsigned NF = Node->getNumValues() - 1;
320   MVT VT = Node->getSimpleValueType(0);
321   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
322   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
323 
324   unsigned CurOp = 2;
325   SmallVector<SDValue, 8> Operands;
326 
327   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
328                                Node->op_begin() + CurOp + NF);
329   SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
330   Operands.push_back(Merge);
331   CurOp += NF;
332 
333   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
334                              Operands, /*IsLoad=*/true);
335 
336   const RISCV::VLSEGPseudo *P =
337       RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
338                             static_cast<unsigned>(LMUL));
339   MachineSDNode *Load =
340       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
341 
342   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
343     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
344 
345   SDValue SuperReg = SDValue(Load, 0);
346   for (unsigned I = 0; I < NF; ++I) {
347     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
348     ReplaceUses(SDValue(Node, I),
349                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
350   }
351 
352   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
353   CurDAG->RemoveDeadNode(Node);
354 }
355 
356 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
357   SDLoc DL(Node);
358   unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
359   MVT VT = Node->getSimpleValueType(0);
360   MVT XLenVT = Subtarget->getXLenVT();
361   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
362   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
363 
364   unsigned CurOp = 2;
365   SmallVector<SDValue, 7> Operands;
366 
367   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
368                                Node->op_begin() + CurOp + NF);
369   SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
370   Operands.push_back(MaskedOff);
371   CurOp += NF;
372 
373   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
374                              /*IsStridedOrIndexed*/ false, Operands,
375                              /*IsLoad=*/true);
376 
377   const RISCV::VLSEGPseudo *P =
378       RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
379                             Log2SEW, static_cast<unsigned>(LMUL));
380   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
381                                                XLenVT, MVT::Other, Operands);
382 
383   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
384     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
385 
386   SDValue SuperReg = SDValue(Load, 0);
387   for (unsigned I = 0; I < NF; ++I) {
388     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
389     ReplaceUses(SDValue(Node, I),
390                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
391   }
392 
393   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));     // VL
394   ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
395   CurDAG->RemoveDeadNode(Node);
396 }
397 
398 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
399                                      bool IsOrdered) {
400   SDLoc DL(Node);
401   unsigned NF = Node->getNumValues() - 1;
402   MVT VT = Node->getSimpleValueType(0);
403   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
404   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
405 
406   unsigned CurOp = 2;
407   SmallVector<SDValue, 8> Operands;
408 
409   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
410                                Node->op_begin() + CurOp + NF);
411   SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
412   Operands.push_back(MaskedOff);
413   CurOp += NF;
414 
415   MVT IndexVT;
416   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
417                              /*IsStridedOrIndexed*/ true, Operands,
418                              /*IsLoad=*/true, &IndexVT);
419 
420   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
421          "Element count mismatch");
422 
423   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
424   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
425   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
426     report_fatal_error("The V extension does not support EEW=64 for index "
427                        "values when XLEN=32");
428   }
429   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
430       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
431       static_cast<unsigned>(IndexLMUL));
432   MachineSDNode *Load =
433       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
434 
435   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
436     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
437 
438   SDValue SuperReg = SDValue(Load, 0);
439   for (unsigned I = 0; I < NF; ++I) {
440     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
441     ReplaceUses(SDValue(Node, I),
442                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
443   }
444 
445   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
446   CurDAG->RemoveDeadNode(Node);
447 }
448 
449 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
450                                     bool IsStrided) {
451   SDLoc DL(Node);
452   unsigned NF = Node->getNumOperands() - 4;
453   if (IsStrided)
454     NF--;
455   if (IsMasked)
456     NF--;
457   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
458   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
459   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
460   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
461   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
462 
463   SmallVector<SDValue, 8> Operands;
464   Operands.push_back(StoreVal);
465   unsigned CurOp = 2 + NF;
466 
467   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
468                              Operands);
469 
470   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
471       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
472   MachineSDNode *Store =
473       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
474 
475   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
476     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
477 
478   ReplaceNode(Node, Store);
479 }
480 
481 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
482                                      bool IsOrdered) {
483   SDLoc DL(Node);
484   unsigned NF = Node->getNumOperands() - 5;
485   if (IsMasked)
486     --NF;
487   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
488   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
489   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
490   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
491   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
492 
493   SmallVector<SDValue, 8> Operands;
494   Operands.push_back(StoreVal);
495   unsigned CurOp = 2 + NF;
496 
497   MVT IndexVT;
498   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
499                              /*IsStridedOrIndexed*/ true, Operands,
500                              /*IsLoad=*/false, &IndexVT);
501 
502   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
503          "Element count mismatch");
504 
505   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
506   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
507   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
508     report_fatal_error("The V extension does not support EEW=64 for index "
509                        "values when XLEN=32");
510   }
511   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
512       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
513       static_cast<unsigned>(IndexLMUL));
514   MachineSDNode *Store =
515       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
516 
517   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
518     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
519 
520   ReplaceNode(Node, Store);
521 }
522 
523 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
524   if (!Subtarget->hasVInstructions())
525     return;
526 
527   assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
528 
529   SDLoc DL(Node);
530   MVT XLenVT = Subtarget->getXLenVT();
531 
532   unsigned IntNo = Node->getConstantOperandVal(0);
533 
534   assert((IntNo == Intrinsic::riscv_vsetvli ||
535           IntNo == Intrinsic::riscv_vsetvlimax) &&
536          "Unexpected vsetvli intrinsic");
537 
538   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
539   unsigned Offset = (VLMax ? 1 : 2);
540 
541   assert(Node->getNumOperands() == Offset + 2 &&
542          "Unexpected number of operands");
543 
544   unsigned SEW =
545       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
546   RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
547       Node->getConstantOperandVal(Offset + 1) & 0x7);
548 
549   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
550                                             /*MaskAgnostic*/ true);
551   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
552 
553   SDValue VLOperand;
554   unsigned Opcode = RISCV::PseudoVSETVLI;
555   if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
556     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
557     Opcode = RISCV::PseudoVSETVLIX0;
558   } else {
559     VLOperand = Node->getOperand(1);
560 
561     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
562       uint64_t AVL = C->getZExtValue();
563       if (isUInt<5>(AVL)) {
564         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
565         ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
566                                                  XLenVT, VLImm, VTypeIOp));
567         return;
568       }
569     }
570   }
571 
572   ReplaceNode(Node,
573               CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
574 }
575 
576 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
577   MVT VT = Node->getSimpleValueType(0);
578   unsigned Opcode = Node->getOpcode();
579   assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
580          "Unexpected opcode");
581   SDLoc DL(Node);
582 
583   // For operations of the form (x << C1) op C2, check if we can use
584   // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
585   SDValue N0 = Node->getOperand(0);
586   SDValue N1 = Node->getOperand(1);
587 
588   ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
589   if (!Cst)
590     return false;
591 
592   int64_t Val = Cst->getSExtValue();
593 
594   // Check if immediate can already use ANDI/ORI/XORI.
595   if (isInt<12>(Val))
596     return false;
597 
598   SDValue Shift = N0;
599 
600   // If Val is simm32 and we have a sext_inreg from i32, then the binop
601   // produces at least 33 sign bits. We can peek through the sext_inreg and use
602   // a SLLIW at the end.
603   bool SignExt = false;
604   if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
605       N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
606     SignExt = true;
607     Shift = N0.getOperand(0);
608   }
609 
610   if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
611     return false;
612 
613   ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
614   if (!ShlCst)
615     return false;
616 
617   uint64_t ShAmt = ShlCst->getZExtValue();
618 
619   // Make sure that we don't change the operation by removing bits.
620   // This only matters for OR and XOR, AND is unaffected.
621   uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
622   if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
623     return false;
624 
625   int64_t ShiftedVal = Val >> ShAmt;
626   if (!isInt<12>(ShiftedVal))
627     return false;
628 
629   // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
630   if (SignExt && ShAmt >= 32)
631     return false;
632 
633   // Ok, we can reorder to get a smaller immediate.
634   unsigned BinOpc;
635   switch (Opcode) {
636   default: llvm_unreachable("Unexpected opcode");
637   case ISD::AND: BinOpc = RISCV::ANDI; break;
638   case ISD::OR:  BinOpc = RISCV::ORI;  break;
639   case ISD::XOR: BinOpc = RISCV::XORI; break;
640   }
641 
642   unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
643 
644   SDNode *BinOp =
645       CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
646                              CurDAG->getTargetConstant(ShiftedVal, DL, VT));
647   SDNode *SLLI =
648       CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
649                              CurDAG->getTargetConstant(ShAmt, DL, VT));
650   ReplaceNode(Node, SLLI);
651   return true;
652 }
653 
654 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
655   // Only supported with XTHeadBb at the moment.
656   if (!Subtarget->hasVendorXTHeadBb())
657     return false;
658 
659   auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
660   if (!N1C)
661     return false;
662 
663   SDValue N0 = Node->getOperand(0);
664   if (!N0.hasOneUse())
665     return false;
666 
667   auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
668                              MVT VT) {
669     return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
670                                   CurDAG->getTargetConstant(Msb, DL, VT),
671                                   CurDAG->getTargetConstant(Lsb, DL, VT));
672   };
673 
674   SDLoc DL(Node);
675   MVT VT = Node->getSimpleValueType(0);
676   const unsigned RightShAmt = N1C->getZExtValue();
677 
678   // Transform (sra (shl X, C1) C2) with C1 < C2
679   //        -> (TH.EXT X, msb, lsb)
680   if (N0.getOpcode() == ISD::SHL) {
681     auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
682     if (!N01C)
683       return false;
684 
685     const unsigned LeftShAmt = N01C->getZExtValue();
686     // Make sure that this is a bitfield extraction (i.e., the shift-right
687     // amount can not be less than the left-shift).
688     if (LeftShAmt > RightShAmt)
689       return false;
690 
691     const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
692     const unsigned Msb = MsbPlusOne - 1;
693     const unsigned Lsb = RightShAmt - LeftShAmt;
694 
695     SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
696     ReplaceNode(Node, TH_EXT);
697     return true;
698   }
699 
700   // Transform (sra (sext_inreg X, _), C) ->
701   //           (TH.EXT X, msb, lsb)
702   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
703     unsigned ExtSize =
704         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
705 
706     // ExtSize of 32 should use sraiw via tablegen pattern.
707     if (ExtSize == 32)
708       return false;
709 
710     const unsigned Msb = ExtSize - 1;
711     const unsigned Lsb = RightShAmt;
712 
713     SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
714     ReplaceNode(Node, TH_EXT);
715     return true;
716   }
717 
718   return false;
719 }
720 
721 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
722   // Target does not support indexed loads.
723   if (!Subtarget->hasVendorXTHeadMemIdx())
724     return false;
725 
726   LoadSDNode *Ld = cast<LoadSDNode>(Node);
727   ISD::MemIndexedMode AM = Ld->getAddressingMode();
728   if (AM == ISD::UNINDEXED)
729     return false;
730 
731   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
732   if (!C)
733     return false;
734 
735   EVT LoadVT = Ld->getMemoryVT();
736   bool IsPre = (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
737   bool IsPost = (AM == ISD::POST_INC || AM == ISD::POST_DEC);
738   int64_t Offset = C->getSExtValue();
739 
740   // Convert decrements to increments by a negative quantity.
741   if (AM == ISD::PRE_DEC || AM == ISD::POST_DEC)
742     Offset = -Offset;
743 
744   // The constants that can be encoded in the THeadMemIdx instructions
745   // are of the form (sign_extend(imm5) << imm2).
746   int64_t Shift;
747   for (Shift = 0; Shift < 4; Shift++)
748     if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
749       break;
750 
751   // Constant cannot be encoded.
752   if (Shift == 4)
753     return false;
754 
755   bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
756   unsigned Opcode;
757   if (LoadVT == MVT::i8 && IsPre)
758     Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
759   else if (LoadVT == MVT::i8 && IsPost)
760     Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
761   else if (LoadVT == MVT::i16 && IsPre)
762     Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
763   else if (LoadVT == MVT::i16 && IsPost)
764     Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
765   else if (LoadVT == MVT::i32 && IsPre)
766     Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
767   else if (LoadVT == MVT::i32 && IsPost)
768     Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
769   else if (LoadVT == MVT::i64 && IsPre)
770     Opcode = RISCV::TH_LDIB;
771   else if (LoadVT == MVT::i64 && IsPost)
772     Opcode = RISCV::TH_LDIA;
773   else
774     return false;
775 
776   EVT Ty = Ld->getOffset().getValueType();
777   SDValue Ops[] = {Ld->getBasePtr(),
778                    CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
779                    CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
780                    Ld->getChain()};
781   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
782                                        Ld->getValueType(1), MVT::Other, Ops);
783 
784   MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
785   CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
786 
787   ReplaceNode(Node, New);
788 
789   return true;
790 }
791 
792 void RISCVDAGToDAGISel::Select(SDNode *Node) {
793   // If we have a custom node, we have already selected.
794   if (Node->isMachineOpcode()) {
795     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
796     Node->setNodeId(-1);
797     return;
798   }
799 
800   // Instruction Selection not handled by the auto-generated tablegen selection
801   // should be handled here.
802   unsigned Opcode = Node->getOpcode();
803   MVT XLenVT = Subtarget->getXLenVT();
804   SDLoc DL(Node);
805   MVT VT = Node->getSimpleValueType(0);
806 
807   bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
808 
809   switch (Opcode) {
810   case ISD::Constant: {
811     assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
812     auto *ConstNode = cast<ConstantSDNode>(Node);
813     if (ConstNode->isZero()) {
814       SDValue New =
815           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
816       ReplaceNode(Node, New.getNode());
817       return;
818     }
819     int64_t Imm = ConstNode->getSExtValue();
820     // If the upper XLen-16 bits are not used, try to convert this to a simm12
821     // by sign extending bit 15.
822     if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
823         hasAllHUsers(Node))
824       Imm = SignExtend64<16>(Imm);
825     // If the upper 32-bits are not used try to convert this into a simm32 by
826     // sign extending bit 32.
827     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
828       Imm = SignExtend64<32>(Imm);
829 
830     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
831     return;
832   }
833   case ISD::ConstantFP: {
834     const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
835     int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(
836         APF, VT);
837     if (FPImm >= 0) {
838       unsigned Opc;
839       switch (VT.SimpleTy) {
840       default:
841         llvm_unreachable("Unexpected size");
842       case MVT::f16:
843         Opc = RISCV::FLI_H;
844         break;
845       case MVT::f32:
846         Opc = RISCV::FLI_S;
847         break;
848       case MVT::f64:
849         Opc = RISCV::FLI_D;
850         break;
851       }
852 
853       SDNode *Res = CurDAG->getMachineNode(
854           Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
855       ReplaceNode(Node, Res);
856       return;
857     }
858 
859     bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
860     SDValue Imm;
861     // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
862     // create an integer immediate.
863     if (APF.isPosZero() || NegZeroF64)
864       Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
865     else
866       Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
867                       *Subtarget);
868 
869     unsigned Opc;
870     switch (VT.SimpleTy) {
871     default:
872       llvm_unreachable("Unexpected size");
873     case MVT::f16:
874       Opc =
875           Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
876       break;
877     case MVT::f32:
878       Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
879       break;
880     case MVT::f64:
881       // For RV32, we can't move from a GPR, we need to convert instead. This
882       // should only happen for +0.0 and -0.0.
883       assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
884       bool HasZdinx = Subtarget->hasStdExtZdinx();
885       if (Subtarget->is64Bit())
886         Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
887       else
888         Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
889       break;
890     }
891 
892     SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
893 
894     // For f64 -0.0, we need to insert a fneg.d idiom.
895     if (NegZeroF64)
896       Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0),
897                                    SDValue(Res, 0));
898 
899     ReplaceNode(Node, Res);
900     return;
901   }
902   case RISCVISD::SplitF64: {
903     if (!Subtarget->hasStdExtZfa())
904       break;
905     assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
906            "Unexpected subtarget");
907 
908     // With Zfa, lower to fmv.x.w and fmvh.x.d.
909     if (!SDValue(Node, 0).use_empty()) {
910       SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
911                                           Node->getOperand(0));
912       ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
913     }
914     if (!SDValue(Node, 1).use_empty()) {
915       SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
916                                           Node->getOperand(0));
917       ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
918     }
919 
920     CurDAG->RemoveDeadNode(Node);
921     return;
922   }
923   case ISD::SHL: {
924     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
925     if (!N1C)
926       break;
927     SDValue N0 = Node->getOperand(0);
928     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
929         !isa<ConstantSDNode>(N0.getOperand(1)))
930       break;
931     unsigned ShAmt = N1C->getZExtValue();
932     uint64_t Mask = N0.getConstantOperandVal(1);
933 
934     // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
935     // 32 leading zeros and C3 trailing zeros.
936     if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
937       unsigned XLen = Subtarget->getXLen();
938       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
939       unsigned TrailingZeros = llvm::countr_zero(Mask);
940       if (TrailingZeros > 0 && LeadingZeros == 32) {
941         SDNode *SRLIW = CurDAG->getMachineNode(
942             RISCV::SRLIW, DL, VT, N0->getOperand(0),
943             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
944         SDNode *SLLI = CurDAG->getMachineNode(
945             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
946             CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
947         ReplaceNode(Node, SLLI);
948         return;
949       }
950     }
951     break;
952   }
953   case ISD::SRL: {
954     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
955     if (!N1C)
956       break;
957     SDValue N0 = Node->getOperand(0);
958     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
959       break;
960     unsigned ShAmt = N1C->getZExtValue();
961     uint64_t Mask = N0.getConstantOperandVal(1);
962 
963     // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
964     // 32 leading zeros and C3 trailing zeros.
965     if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
966       unsigned XLen = Subtarget->getXLen();
967       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
968       unsigned TrailingZeros = llvm::countr_zero(Mask);
969       if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
970         SDNode *SRLIW = CurDAG->getMachineNode(
971             RISCV::SRLIW, DL, VT, N0->getOperand(0),
972             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
973         SDNode *SLLI = CurDAG->getMachineNode(
974             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
975             CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
976         ReplaceNode(Node, SLLI);
977         return;
978       }
979     }
980 
981     // Optimize (srl (and X, C2), C) ->
982     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
983     // Where C2 is a mask with C3 trailing ones.
984     // Taking into account that the C2 may have had lower bits unset by
985     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
986     // This pattern occurs when type legalizing right shifts for types with
987     // less than XLen bits.
988     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
989     if (!isMask_64(Mask))
990       break;
991     unsigned TrailingOnes = llvm::countr_one(Mask);
992     if (ShAmt >= TrailingOnes)
993       break;
994     // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
995     if (TrailingOnes == 32) {
996       SDNode *SRLI = CurDAG->getMachineNode(
997           Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
998           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
999       ReplaceNode(Node, SRLI);
1000       return;
1001     }
1002 
1003     // Only do the remaining transforms if the AND has one use.
1004     if (!N0.hasOneUse())
1005       break;
1006 
1007     // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1008     if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1009       SDNode *BEXTI = CurDAG->getMachineNode(
1010           Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1011           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1012       ReplaceNode(Node, BEXTI);
1013       return;
1014     }
1015 
1016     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1017     SDNode *SLLI =
1018         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1019                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1020     SDNode *SRLI = CurDAG->getMachineNode(
1021         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1022         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1023     ReplaceNode(Node, SRLI);
1024     return;
1025   }
1026   case ISD::SRA: {
1027     if (trySignedBitfieldExtract(Node))
1028       return;
1029 
1030     // Optimize (sra (sext_inreg X, i16), C) ->
1031     //          (srai (slli X, (XLen-16), (XLen-16) + C)
1032     // And      (sra (sext_inreg X, i8), C) ->
1033     //          (srai (slli X, (XLen-8), (XLen-8) + C)
1034     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1035     // This transform matches the code we get without Zbb. The shifts are more
1036     // compressible, and this can help expose CSE opportunities in the sdiv by
1037     // constant optimization.
1038     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1039     if (!N1C)
1040       break;
1041     SDValue N0 = Node->getOperand(0);
1042     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1043       break;
1044     unsigned ShAmt = N1C->getZExtValue();
1045     unsigned ExtSize =
1046         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1047     // ExtSize of 32 should use sraiw via tablegen pattern.
1048     if (ExtSize >= 32 || ShAmt >= ExtSize)
1049       break;
1050     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1051     SDNode *SLLI =
1052         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1053                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1054     SDNode *SRAI = CurDAG->getMachineNode(
1055         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1056         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1057     ReplaceNode(Node, SRAI);
1058     return;
1059   }
1060   case ISD::OR:
1061   case ISD::XOR:
1062     if (tryShrinkShlLogicImm(Node))
1063       return;
1064 
1065     break;
1066   case ISD::AND: {
1067     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1068     if (!N1C)
1069       break;
1070     uint64_t C1 = N1C->getZExtValue();
1071     const bool isC1Mask = isMask_64(C1);
1072     const bool isC1ANDI = isInt<12>(C1);
1073 
1074     SDValue N0 = Node->getOperand(0);
1075 
1076     auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1077                                           SDValue X, unsigned Msb,
1078                                           unsigned Lsb) {
1079       if (!Subtarget->hasVendorXTHeadBb())
1080         return false;
1081 
1082       SDNode *TH_EXTU = CurDAG->getMachineNode(
1083           RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1084           CurDAG->getTargetConstant(Lsb, DL, VT));
1085       ReplaceNode(Node, TH_EXTU);
1086       return true;
1087     };
1088 
1089     bool LeftShift = N0.getOpcode() == ISD::SHL;
1090     if (LeftShift || N0.getOpcode() == ISD::SRL) {
1091       auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1092       if (!C)
1093         break;
1094       unsigned C2 = C->getZExtValue();
1095       unsigned XLen = Subtarget->getXLen();
1096       assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1097 
1098       // Keep track of whether this is a c.andi. If we can't use c.andi, the
1099       // shift pair might offer more compression opportunities.
1100       // TODO: We could check for C extension here, but we don't have many lit
1101       // tests with the C extension enabled so not checking gets better
1102       // coverage.
1103       // TODO: What if ANDI faster than shift?
1104       bool IsCANDI = isInt<6>(N1C->getSExtValue());
1105 
1106       // Clear irrelevant bits in the mask.
1107       if (LeftShift)
1108         C1 &= maskTrailingZeros<uint64_t>(C2);
1109       else
1110         C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1111 
1112       // Some transforms should only be done if the shift has a single use or
1113       // the AND would become (srli (slli X, 32), 32)
1114       bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1115 
1116       SDValue X = N0.getOperand(0);
1117 
1118       // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1119       // with c3 leading zeros.
1120       if (!LeftShift && isC1Mask) {
1121         unsigned Leading = XLen - llvm::bit_width(C1);
1122         if (C2 < Leading) {
1123           // If the number of leading zeros is C2+32 this can be SRLIW.
1124           if (C2 + 32 == Leading) {
1125             SDNode *SRLIW = CurDAG->getMachineNode(
1126                 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1127             ReplaceNode(Node, SRLIW);
1128             return;
1129           }
1130 
1131           // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1132           // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1133           //
1134           // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1135           // legalized and goes through DAG combine.
1136           if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1137               X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1138               cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1139             SDNode *SRAIW =
1140                 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1141                                        CurDAG->getTargetConstant(31, DL, VT));
1142             SDNode *SRLIW = CurDAG->getMachineNode(
1143                 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1144                 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1145             ReplaceNode(Node, SRLIW);
1146             return;
1147           }
1148 
1149           // Try to use an unsigned bitfield extract (e.g., th.extu) if
1150           // available.
1151           // Transform (and (srl x, C2), C1)
1152           //        -> (<bfextract> x, msb, lsb)
1153           //
1154           // Make sure to keep this below the SRLIW cases, as we always want to
1155           // prefer the more common instruction.
1156           const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1157           const unsigned Lsb = C2;
1158           if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1159             return;
1160 
1161           // (srli (slli x, c3-c2), c3).
1162           // Skip if we could use (zext.w (sraiw X, C2)).
1163           bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1164                       X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1165                       cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1166           // Also Skip if we can use bexti or th.tst.
1167           Skip |= HasBitTest && Leading == XLen - 1;
1168           if (OneUseOrZExtW && !Skip) {
1169             SDNode *SLLI = CurDAG->getMachineNode(
1170                 RISCV::SLLI, DL, VT, X,
1171                 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1172             SDNode *SRLI = CurDAG->getMachineNode(
1173                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1174                 CurDAG->getTargetConstant(Leading, DL, VT));
1175             ReplaceNode(Node, SRLI);
1176             return;
1177           }
1178         }
1179       }
1180 
1181       // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1182       // shifted by c2 bits with c3 leading zeros.
1183       if (LeftShift && isShiftedMask_64(C1)) {
1184         unsigned Leading = XLen - llvm::bit_width(C1);
1185 
1186         if (C2 + Leading < XLen &&
1187             C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1188           // Use slli.uw when possible.
1189           if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1190             SDNode *SLLI_UW =
1191                 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1192                                        CurDAG->getTargetConstant(C2, DL, VT));
1193             ReplaceNode(Node, SLLI_UW);
1194             return;
1195           }
1196 
1197           // (srli (slli c2+c3), c3)
1198           if (OneUseOrZExtW && !IsCANDI) {
1199             SDNode *SLLI = CurDAG->getMachineNode(
1200                 RISCV::SLLI, DL, VT, X,
1201                 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1202             SDNode *SRLI = CurDAG->getMachineNode(
1203                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1204                 CurDAG->getTargetConstant(Leading, DL, VT));
1205             ReplaceNode(Node, SRLI);
1206             return;
1207           }
1208         }
1209       }
1210 
1211       // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1212       // shifted mask with c2 leading zeros and c3 trailing zeros.
1213       if (!LeftShift && isShiftedMask_64(C1)) {
1214         unsigned Leading = XLen - llvm::bit_width(C1);
1215         unsigned Trailing = llvm::countr_zero(C1);
1216         if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1217             !IsCANDI) {
1218           unsigned SrliOpc = RISCV::SRLI;
1219           // If the input is zexti32 we should use SRLIW.
1220           if (X.getOpcode() == ISD::AND &&
1221               isa<ConstantSDNode>(X.getOperand(1)) &&
1222               X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1223             SrliOpc = RISCV::SRLIW;
1224             X = X.getOperand(0);
1225           }
1226           SDNode *SRLI = CurDAG->getMachineNode(
1227               SrliOpc, DL, VT, X,
1228               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1229           SDNode *SLLI = CurDAG->getMachineNode(
1230               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1231               CurDAG->getTargetConstant(Trailing, DL, VT));
1232           ReplaceNode(Node, SLLI);
1233           return;
1234         }
1235         // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1236         if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1237             OneUseOrZExtW && !IsCANDI) {
1238           SDNode *SRLIW = CurDAG->getMachineNode(
1239               RISCV::SRLIW, DL, VT, X,
1240               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1241           SDNode *SLLI = CurDAG->getMachineNode(
1242               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1243               CurDAG->getTargetConstant(Trailing, DL, VT));
1244           ReplaceNode(Node, SLLI);
1245           return;
1246         }
1247       }
1248 
1249       // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1250       // shifted mask with no leading zeros and c3 trailing zeros.
1251       if (LeftShift && isShiftedMask_64(C1)) {
1252         unsigned Leading = XLen - llvm::bit_width(C1);
1253         unsigned Trailing = llvm::countr_zero(C1);
1254         if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1255           SDNode *SRLI = CurDAG->getMachineNode(
1256               RISCV::SRLI, DL, VT, X,
1257               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1258           SDNode *SLLI = CurDAG->getMachineNode(
1259               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1260               CurDAG->getTargetConstant(Trailing, DL, VT));
1261           ReplaceNode(Node, SLLI);
1262           return;
1263         }
1264         // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1265         if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1266           SDNode *SRLIW = CurDAG->getMachineNode(
1267               RISCV::SRLIW, DL, VT, X,
1268               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1269           SDNode *SLLI = CurDAG->getMachineNode(
1270               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1271               CurDAG->getTargetConstant(Trailing, DL, VT));
1272           ReplaceNode(Node, SLLI);
1273           return;
1274         }
1275       }
1276     }
1277 
1278     // If C1 masks off the upper bits only (but can't be formed as an
1279     // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1280     // available.
1281     // Transform (and x, C1)
1282     //        -> (<bfextract> x, msb, lsb)
1283     if (isC1Mask && !isC1ANDI) {
1284       const unsigned Msb = llvm::bit_width(C1) - 1;
1285       if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1286         return;
1287     }
1288 
1289     if (tryShrinkShlLogicImm(Node))
1290       return;
1291 
1292     break;
1293   }
1294   case ISD::MUL: {
1295     // Special case for calculating (mul (and X, C2), C1) where the full product
1296     // fits in XLen bits. We can shift X left by the number of leading zeros in
1297     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1298     // product has XLen trailing zeros, putting it in the output of MULHU. This
1299     // can avoid materializing a constant in a register for C2.
1300 
1301     // RHS should be a constant.
1302     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1303     if (!N1C || !N1C->hasOneUse())
1304       break;
1305 
1306     // LHS should be an AND with constant.
1307     SDValue N0 = Node->getOperand(0);
1308     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1309       break;
1310 
1311     uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
1312 
1313     // Constant should be a mask.
1314     if (!isMask_64(C2))
1315       break;
1316 
1317     // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1318     // multiple users or the constant is a simm12. This prevents inserting a
1319     // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1320     // make it more costly to materialize. Otherwise, using a SLLI might allow
1321     // it to be compressed.
1322     bool IsANDIOrZExt =
1323         isInt<12>(C2) ||
1324         (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1325     // With XTHeadBb, we can use TH.EXTU.
1326     IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1327     if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1328       break;
1329     // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1330     // the constant is a simm32.
1331     bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1332     // With XTHeadBb, we can use TH.EXTU.
1333     IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1334     if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1335       break;
1336 
1337     // We need to shift left the AND input and C1 by a total of XLen bits.
1338 
1339     // How far left do we need to shift the AND input?
1340     unsigned XLen = Subtarget->getXLen();
1341     unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1342 
1343     // The constant gets shifted by the remaining amount unless that would
1344     // shift bits out.
1345     uint64_t C1 = N1C->getZExtValue();
1346     unsigned ConstantShift = XLen - LeadingZeros;
1347     if (ConstantShift > (XLen - llvm::bit_width(C1)))
1348       break;
1349 
1350     uint64_t ShiftedC1 = C1 << ConstantShift;
1351     // If this RV32, we need to sign extend the constant.
1352     if (XLen == 32)
1353       ShiftedC1 = SignExtend64<32>(ShiftedC1);
1354 
1355     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1356     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1357     SDNode *SLLI =
1358         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1359                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1360     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1361                                            SDValue(SLLI, 0), SDValue(Imm, 0));
1362     ReplaceNode(Node, MULHU);
1363     return;
1364   }
1365   case ISD::LOAD: {
1366     if (tryIndexedLoad(Node))
1367       return;
1368     break;
1369   }
1370   case ISD::INTRINSIC_WO_CHAIN: {
1371     unsigned IntNo = Node->getConstantOperandVal(0);
1372     switch (IntNo) {
1373       // By default we do not custom select any intrinsic.
1374     default:
1375       break;
1376     case Intrinsic::riscv_vmsgeu:
1377     case Intrinsic::riscv_vmsge: {
1378       SDValue Src1 = Node->getOperand(1);
1379       SDValue Src2 = Node->getOperand(2);
1380       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1381       bool IsCmpUnsignedZero = false;
1382       // Only custom select scalar second operand.
1383       if (Src2.getValueType() != XLenVT)
1384         break;
1385       // Small constants are handled with patterns.
1386       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1387         int64_t CVal = C->getSExtValue();
1388         if (CVal >= -15 && CVal <= 16) {
1389           if (!IsUnsigned || CVal != 0)
1390             break;
1391           IsCmpUnsignedZero = true;
1392         }
1393       }
1394       MVT Src1VT = Src1.getSimpleValueType();
1395       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1396       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1397       default:
1398         llvm_unreachable("Unexpected LMUL!");
1399 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)            \
1400   case RISCVII::VLMUL::lmulenum:                                               \
1401     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1402                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1403     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1404     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
1405     break;
1406         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1407         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1408         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1409         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
1410         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1411         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1412         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1413 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1414       }
1415       SDValue SEW = CurDAG->getTargetConstant(
1416           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1417       SDValue VL;
1418       selectVLOp(Node->getOperand(3), VL);
1419 
1420       // If vmsgeu with 0 immediate, expand it to vmset.
1421       if (IsCmpUnsignedZero) {
1422         ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1423         return;
1424       }
1425 
1426       // Expand to
1427       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1428       SDValue Cmp = SDValue(
1429           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1430           0);
1431       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1432                                                {Cmp, Cmp, VL, SEW}));
1433       return;
1434     }
1435     case Intrinsic::riscv_vmsgeu_mask:
1436     case Intrinsic::riscv_vmsge_mask: {
1437       SDValue Src1 = Node->getOperand(2);
1438       SDValue Src2 = Node->getOperand(3);
1439       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1440       bool IsCmpUnsignedZero = false;
1441       // Only custom select scalar second operand.
1442       if (Src2.getValueType() != XLenVT)
1443         break;
1444       // Small constants are handled with patterns.
1445       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1446         int64_t CVal = C->getSExtValue();
1447         if (CVal >= -15 && CVal <= 16) {
1448           if (!IsUnsigned || CVal != 0)
1449             break;
1450           IsCmpUnsignedZero = true;
1451         }
1452       }
1453       MVT Src1VT = Src1.getSimpleValueType();
1454       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1455           VMOROpcode;
1456       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1457       default:
1458         llvm_unreachable("Unexpected LMUL!");
1459 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)                         \
1460   case RISCVII::VLMUL::lmulenum:                                               \
1461     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1462                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1463     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1464                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1465     break;
1466         CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1467         CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1468         CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1469         CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1470         CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1471         CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1472         CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1473 #undef CASE_VMSLT_OPCODES
1474       }
1475       // Mask operations use the LMUL from the mask type.
1476       switch (RISCVTargetLowering::getLMUL(VT)) {
1477       default:
1478         llvm_unreachable("Unexpected LMUL!");
1479 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
1480   case RISCVII::VLMUL::lmulenum:                                               \
1481     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1482     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1483     VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
1484     break;
1485         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1486         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1487         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1488         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
1489         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
1490         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
1491         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
1492 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1493       }
1494       SDValue SEW = CurDAG->getTargetConstant(
1495           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1496       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1497       SDValue VL;
1498       selectVLOp(Node->getOperand(5), VL);
1499       SDValue MaskedOff = Node->getOperand(1);
1500       SDValue Mask = Node->getOperand(4);
1501 
1502       // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1503       if (IsCmpUnsignedZero) {
1504         // We don't need vmor if the MaskedOff and the Mask are the same
1505         // value.
1506         if (Mask == MaskedOff) {
1507           ReplaceUses(Node, Mask.getNode());
1508           return;
1509         }
1510         ReplaceNode(Node,
1511                     CurDAG->getMachineNode(VMOROpcode, DL, VT,
1512                                            {Mask, MaskedOff, VL, MaskSEW}));
1513         return;
1514       }
1515 
1516       // If the MaskedOff value and the Mask are the same value use
1517       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1518       // This avoids needing to copy v0 to vd before starting the next sequence.
1519       if (Mask == MaskedOff) {
1520         SDValue Cmp = SDValue(
1521             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1522             0);
1523         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1524                                                  {Mask, Cmp, VL, MaskSEW}));
1525         return;
1526       }
1527 
1528       // Mask needs to be copied to V0.
1529       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1530                                            RISCV::V0, Mask, SDValue());
1531       SDValue Glue = Chain.getValue(1);
1532       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1533 
1534       // Otherwise use
1535       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1536       // The result is mask undisturbed.
1537       // We use the same instructions to emulate mask agnostic behavior, because
1538       // the agnostic result can be either undisturbed or all 1.
1539       SDValue Cmp = SDValue(
1540           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1541                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1542           0);
1543       // vmxor.mm vd, vd, v0 is used to update active value.
1544       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1545                                                {Cmp, Mask, VL, MaskSEW}));
1546       return;
1547     }
1548     case Intrinsic::riscv_vsetvli:
1549     case Intrinsic::riscv_vsetvlimax:
1550       return selectVSETVLI(Node);
1551     }
1552     break;
1553   }
1554   case ISD::INTRINSIC_W_CHAIN: {
1555     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1556     switch (IntNo) {
1557       // By default we do not custom select any intrinsic.
1558     default:
1559       break;
1560     case Intrinsic::riscv_vlseg2:
1561     case Intrinsic::riscv_vlseg3:
1562     case Intrinsic::riscv_vlseg4:
1563     case Intrinsic::riscv_vlseg5:
1564     case Intrinsic::riscv_vlseg6:
1565     case Intrinsic::riscv_vlseg7:
1566     case Intrinsic::riscv_vlseg8: {
1567       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1568       return;
1569     }
1570     case Intrinsic::riscv_vlseg2_mask:
1571     case Intrinsic::riscv_vlseg3_mask:
1572     case Intrinsic::riscv_vlseg4_mask:
1573     case Intrinsic::riscv_vlseg5_mask:
1574     case Intrinsic::riscv_vlseg6_mask:
1575     case Intrinsic::riscv_vlseg7_mask:
1576     case Intrinsic::riscv_vlseg8_mask: {
1577       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1578       return;
1579     }
1580     case Intrinsic::riscv_vlsseg2:
1581     case Intrinsic::riscv_vlsseg3:
1582     case Intrinsic::riscv_vlsseg4:
1583     case Intrinsic::riscv_vlsseg5:
1584     case Intrinsic::riscv_vlsseg6:
1585     case Intrinsic::riscv_vlsseg7:
1586     case Intrinsic::riscv_vlsseg8: {
1587       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1588       return;
1589     }
1590     case Intrinsic::riscv_vlsseg2_mask:
1591     case Intrinsic::riscv_vlsseg3_mask:
1592     case Intrinsic::riscv_vlsseg4_mask:
1593     case Intrinsic::riscv_vlsseg5_mask:
1594     case Intrinsic::riscv_vlsseg6_mask:
1595     case Intrinsic::riscv_vlsseg7_mask:
1596     case Intrinsic::riscv_vlsseg8_mask: {
1597       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1598       return;
1599     }
1600     case Intrinsic::riscv_vloxseg2:
1601     case Intrinsic::riscv_vloxseg3:
1602     case Intrinsic::riscv_vloxseg4:
1603     case Intrinsic::riscv_vloxseg5:
1604     case Intrinsic::riscv_vloxseg6:
1605     case Intrinsic::riscv_vloxseg7:
1606     case Intrinsic::riscv_vloxseg8:
1607       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1608       return;
1609     case Intrinsic::riscv_vluxseg2:
1610     case Intrinsic::riscv_vluxseg3:
1611     case Intrinsic::riscv_vluxseg4:
1612     case Intrinsic::riscv_vluxseg5:
1613     case Intrinsic::riscv_vluxseg6:
1614     case Intrinsic::riscv_vluxseg7:
1615     case Intrinsic::riscv_vluxseg8:
1616       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1617       return;
1618     case Intrinsic::riscv_vloxseg2_mask:
1619     case Intrinsic::riscv_vloxseg3_mask:
1620     case Intrinsic::riscv_vloxseg4_mask:
1621     case Intrinsic::riscv_vloxseg5_mask:
1622     case Intrinsic::riscv_vloxseg6_mask:
1623     case Intrinsic::riscv_vloxseg7_mask:
1624     case Intrinsic::riscv_vloxseg8_mask:
1625       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1626       return;
1627     case Intrinsic::riscv_vluxseg2_mask:
1628     case Intrinsic::riscv_vluxseg3_mask:
1629     case Intrinsic::riscv_vluxseg4_mask:
1630     case Intrinsic::riscv_vluxseg5_mask:
1631     case Intrinsic::riscv_vluxseg6_mask:
1632     case Intrinsic::riscv_vluxseg7_mask:
1633     case Intrinsic::riscv_vluxseg8_mask:
1634       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1635       return;
1636     case Intrinsic::riscv_vlseg8ff:
1637     case Intrinsic::riscv_vlseg7ff:
1638     case Intrinsic::riscv_vlseg6ff:
1639     case Intrinsic::riscv_vlseg5ff:
1640     case Intrinsic::riscv_vlseg4ff:
1641     case Intrinsic::riscv_vlseg3ff:
1642     case Intrinsic::riscv_vlseg2ff: {
1643       selectVLSEGFF(Node, /*IsMasked*/ false);
1644       return;
1645     }
1646     case Intrinsic::riscv_vlseg8ff_mask:
1647     case Intrinsic::riscv_vlseg7ff_mask:
1648     case Intrinsic::riscv_vlseg6ff_mask:
1649     case Intrinsic::riscv_vlseg5ff_mask:
1650     case Intrinsic::riscv_vlseg4ff_mask:
1651     case Intrinsic::riscv_vlseg3ff_mask:
1652     case Intrinsic::riscv_vlseg2ff_mask: {
1653       selectVLSEGFF(Node, /*IsMasked*/ true);
1654       return;
1655     }
1656     case Intrinsic::riscv_vloxei:
1657     case Intrinsic::riscv_vloxei_mask:
1658     case Intrinsic::riscv_vluxei:
1659     case Intrinsic::riscv_vluxei_mask: {
1660       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1661                       IntNo == Intrinsic::riscv_vluxei_mask;
1662       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1663                        IntNo == Intrinsic::riscv_vloxei_mask;
1664 
1665       MVT VT = Node->getSimpleValueType(0);
1666       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1667 
1668       unsigned CurOp = 2;
1669       SmallVector<SDValue, 8> Operands;
1670       Operands.push_back(Node->getOperand(CurOp++));
1671 
1672       MVT IndexVT;
1673       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1674                                  /*IsStridedOrIndexed*/ true, Operands,
1675                                  /*IsLoad=*/true, &IndexVT);
1676 
1677       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1678              "Element count mismatch");
1679 
1680       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1681       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1682       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1683       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1684         report_fatal_error("The V extension does not support EEW=64 for index "
1685                            "values when XLEN=32");
1686       }
1687       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1688           IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1689           static_cast<unsigned>(IndexLMUL));
1690       MachineSDNode *Load =
1691           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1692 
1693       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1694         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1695 
1696       ReplaceNode(Node, Load);
1697       return;
1698     }
1699     case Intrinsic::riscv_vlm:
1700     case Intrinsic::riscv_vle:
1701     case Intrinsic::riscv_vle_mask:
1702     case Intrinsic::riscv_vlse:
1703     case Intrinsic::riscv_vlse_mask: {
1704       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1705                       IntNo == Intrinsic::riscv_vlse_mask;
1706       bool IsStrided =
1707           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1708 
1709       MVT VT = Node->getSimpleValueType(0);
1710       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1711 
1712       // The riscv_vlm intrinsic are always tail agnostic and no passthru
1713       // operand at the IR level.  In pseudos, they have both policy and
1714       // passthru operand. The passthru operand is needed to track the
1715       // "tail undefined" state, and the policy is there just for
1716       // for consistency - it will always be "don't care" for the
1717       // unmasked form.
1718       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1719       unsigned CurOp = 2;
1720       SmallVector<SDValue, 8> Operands;
1721       if (HasPassthruOperand)
1722         Operands.push_back(Node->getOperand(CurOp++));
1723       else {
1724         // We eagerly lower to implicit_def (instead of undef), as we
1725         // otherwise fail to select nodes such as: nxv1i1 = undef
1726         SDNode *Passthru =
1727           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1728         Operands.push_back(SDValue(Passthru, 0));
1729       }
1730       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1731                                  Operands, /*IsLoad=*/true);
1732 
1733       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1734       const RISCV::VLEPseudo *P =
1735           RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1736                               static_cast<unsigned>(LMUL));
1737       MachineSDNode *Load =
1738           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1739 
1740       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1741         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1742 
1743       ReplaceNode(Node, Load);
1744       return;
1745     }
1746     case Intrinsic::riscv_vleff:
1747     case Intrinsic::riscv_vleff_mask: {
1748       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1749 
1750       MVT VT = Node->getSimpleValueType(0);
1751       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1752 
1753       unsigned CurOp = 2;
1754       SmallVector<SDValue, 7> Operands;
1755       Operands.push_back(Node->getOperand(CurOp++));
1756       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1757                                  /*IsStridedOrIndexed*/ false, Operands,
1758                                  /*IsLoad=*/true);
1759 
1760       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1761       const RISCV::VLEPseudo *P =
1762           RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1763                               Log2SEW, static_cast<unsigned>(LMUL));
1764       MachineSDNode *Load = CurDAG->getMachineNode(
1765           P->Pseudo, DL, Node->getVTList(), Operands);
1766       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1767         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1768 
1769       ReplaceNode(Node, Load);
1770       return;
1771     }
1772     }
1773     break;
1774   }
1775   case ISD::INTRINSIC_VOID: {
1776     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
1777     switch (IntNo) {
1778     case Intrinsic::riscv_vsseg2:
1779     case Intrinsic::riscv_vsseg3:
1780     case Intrinsic::riscv_vsseg4:
1781     case Intrinsic::riscv_vsseg5:
1782     case Intrinsic::riscv_vsseg6:
1783     case Intrinsic::riscv_vsseg7:
1784     case Intrinsic::riscv_vsseg8: {
1785       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1786       return;
1787     }
1788     case Intrinsic::riscv_vsseg2_mask:
1789     case Intrinsic::riscv_vsseg3_mask:
1790     case Intrinsic::riscv_vsseg4_mask:
1791     case Intrinsic::riscv_vsseg5_mask:
1792     case Intrinsic::riscv_vsseg6_mask:
1793     case Intrinsic::riscv_vsseg7_mask:
1794     case Intrinsic::riscv_vsseg8_mask: {
1795       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1796       return;
1797     }
1798     case Intrinsic::riscv_vssseg2:
1799     case Intrinsic::riscv_vssseg3:
1800     case Intrinsic::riscv_vssseg4:
1801     case Intrinsic::riscv_vssseg5:
1802     case Intrinsic::riscv_vssseg6:
1803     case Intrinsic::riscv_vssseg7:
1804     case Intrinsic::riscv_vssseg8: {
1805       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1806       return;
1807     }
1808     case Intrinsic::riscv_vssseg2_mask:
1809     case Intrinsic::riscv_vssseg3_mask:
1810     case Intrinsic::riscv_vssseg4_mask:
1811     case Intrinsic::riscv_vssseg5_mask:
1812     case Intrinsic::riscv_vssseg6_mask:
1813     case Intrinsic::riscv_vssseg7_mask:
1814     case Intrinsic::riscv_vssseg8_mask: {
1815       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1816       return;
1817     }
1818     case Intrinsic::riscv_vsoxseg2:
1819     case Intrinsic::riscv_vsoxseg3:
1820     case Intrinsic::riscv_vsoxseg4:
1821     case Intrinsic::riscv_vsoxseg5:
1822     case Intrinsic::riscv_vsoxseg6:
1823     case Intrinsic::riscv_vsoxseg7:
1824     case Intrinsic::riscv_vsoxseg8:
1825       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1826       return;
1827     case Intrinsic::riscv_vsuxseg2:
1828     case Intrinsic::riscv_vsuxseg3:
1829     case Intrinsic::riscv_vsuxseg4:
1830     case Intrinsic::riscv_vsuxseg5:
1831     case Intrinsic::riscv_vsuxseg6:
1832     case Intrinsic::riscv_vsuxseg7:
1833     case Intrinsic::riscv_vsuxseg8:
1834       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1835       return;
1836     case Intrinsic::riscv_vsoxseg2_mask:
1837     case Intrinsic::riscv_vsoxseg3_mask:
1838     case Intrinsic::riscv_vsoxseg4_mask:
1839     case Intrinsic::riscv_vsoxseg5_mask:
1840     case Intrinsic::riscv_vsoxseg6_mask:
1841     case Intrinsic::riscv_vsoxseg7_mask:
1842     case Intrinsic::riscv_vsoxseg8_mask:
1843       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1844       return;
1845     case Intrinsic::riscv_vsuxseg2_mask:
1846     case Intrinsic::riscv_vsuxseg3_mask:
1847     case Intrinsic::riscv_vsuxseg4_mask:
1848     case Intrinsic::riscv_vsuxseg5_mask:
1849     case Intrinsic::riscv_vsuxseg6_mask:
1850     case Intrinsic::riscv_vsuxseg7_mask:
1851     case Intrinsic::riscv_vsuxseg8_mask:
1852       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1853       return;
1854     case Intrinsic::riscv_vsoxei:
1855     case Intrinsic::riscv_vsoxei_mask:
1856     case Intrinsic::riscv_vsuxei:
1857     case Intrinsic::riscv_vsuxei_mask: {
1858       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
1859                       IntNo == Intrinsic::riscv_vsuxei_mask;
1860       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
1861                        IntNo == Intrinsic::riscv_vsoxei_mask;
1862 
1863       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1864       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1865 
1866       unsigned CurOp = 2;
1867       SmallVector<SDValue, 8> Operands;
1868       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1869 
1870       MVT IndexVT;
1871       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1872                                  /*IsStridedOrIndexed*/ true, Operands,
1873                                  /*IsLoad=*/false, &IndexVT);
1874 
1875       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1876              "Element count mismatch");
1877 
1878       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1879       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1880       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1881       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1882         report_fatal_error("The V extension does not support EEW=64 for index "
1883                            "values when XLEN=32");
1884       }
1885       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
1886           IsMasked, IsOrdered, IndexLog2EEW,
1887           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
1888       MachineSDNode *Store =
1889           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1890 
1891       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1892         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1893 
1894       ReplaceNode(Node, Store);
1895       return;
1896     }
1897     case Intrinsic::riscv_vsm:
1898     case Intrinsic::riscv_vse:
1899     case Intrinsic::riscv_vse_mask:
1900     case Intrinsic::riscv_vsse:
1901     case Intrinsic::riscv_vsse_mask: {
1902       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
1903                       IntNo == Intrinsic::riscv_vsse_mask;
1904       bool IsStrided =
1905           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
1906 
1907       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
1908       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1909 
1910       unsigned CurOp = 2;
1911       SmallVector<SDValue, 8> Operands;
1912       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
1913 
1914       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1915                                  Operands);
1916 
1917       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1918       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
1919           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
1920       MachineSDNode *Store =
1921           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1922       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1923         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
1924 
1925       ReplaceNode(Node, Store);
1926       return;
1927     }
1928     }
1929     break;
1930   }
1931   case ISD::BITCAST: {
1932     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
1933     // Just drop bitcasts between vectors if both are fixed or both are
1934     // scalable.
1935     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
1936         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
1937       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
1938       CurDAG->RemoveDeadNode(Node);
1939       return;
1940     }
1941     break;
1942   }
1943   case ISD::INSERT_SUBVECTOR: {
1944     SDValue V = Node->getOperand(0);
1945     SDValue SubV = Node->getOperand(1);
1946     SDLoc DL(SubV);
1947     auto Idx = Node->getConstantOperandVal(2);
1948     MVT SubVecVT = SubV.getSimpleValueType();
1949 
1950     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
1951     MVT SubVecContainerVT = SubVecVT;
1952     // Establish the correct scalable-vector types for any fixed-length type.
1953     if (SubVecVT.isFixedLengthVector())
1954       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
1955     if (VT.isFixedLengthVector())
1956       VT = TLI.getContainerForFixedLengthVector(VT);
1957 
1958     const auto *TRI = Subtarget->getRegisterInfo();
1959     unsigned SubRegIdx;
1960     std::tie(SubRegIdx, Idx) =
1961         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
1962             VT, SubVecContainerVT, Idx, TRI);
1963 
1964     // If the Idx hasn't been completely eliminated then this is a subvector
1965     // insert which doesn't naturally align to a vector register. These must
1966     // be handled using instructions to manipulate the vector registers.
1967     if (Idx != 0)
1968       break;
1969 
1970     RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
1971     bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
1972                            SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
1973                            SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
1974     (void)IsSubVecPartReg; // Silence unused variable warning without asserts.
1975     assert((!IsSubVecPartReg || V.isUndef()) &&
1976            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
1977            "the subvector is smaller than a full-sized register");
1978 
1979     // If we haven't set a SubRegIdx, then we must be going between
1980     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
1981     if (SubRegIdx == RISCV::NoSubRegister) {
1982       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT);
1983       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
1984                  InRegClassID &&
1985              "Unexpected subvector extraction");
1986       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
1987       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
1988                                                DL, VT, SubV, RC);
1989       ReplaceNode(Node, NewNode);
1990       return;
1991     }
1992 
1993     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
1994     ReplaceNode(Node, Insert.getNode());
1995     return;
1996   }
1997   case ISD::EXTRACT_SUBVECTOR: {
1998     SDValue V = Node->getOperand(0);
1999     auto Idx = Node->getConstantOperandVal(1);
2000     MVT InVT = V.getSimpleValueType();
2001     SDLoc DL(V);
2002 
2003     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2004     MVT SubVecContainerVT = VT;
2005     // Establish the correct scalable-vector types for any fixed-length type.
2006     if (VT.isFixedLengthVector())
2007       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2008     if (InVT.isFixedLengthVector())
2009       InVT = TLI.getContainerForFixedLengthVector(InVT);
2010 
2011     const auto *TRI = Subtarget->getRegisterInfo();
2012     unsigned SubRegIdx;
2013     std::tie(SubRegIdx, Idx) =
2014         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2015             InVT, SubVecContainerVT, Idx, TRI);
2016 
2017     // If the Idx hasn't been completely eliminated then this is a subvector
2018     // extract which doesn't naturally align to a vector register. These must
2019     // be handled using instructions to manipulate the vector registers.
2020     if (Idx != 0)
2021       break;
2022 
2023     // If we haven't set a SubRegIdx, then we must be going between
2024     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2025     if (SubRegIdx == RISCV::NoSubRegister) {
2026       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2027       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2028                  InRegClassID &&
2029              "Unexpected subvector extraction");
2030       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2031       SDNode *NewNode =
2032           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2033       ReplaceNode(Node, NewNode);
2034       return;
2035     }
2036 
2037     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2038     ReplaceNode(Node, Extract.getNode());
2039     return;
2040   }
2041   case RISCVISD::VMV_S_X_VL:
2042   case RISCVISD::VFMV_S_F_VL:
2043   case RISCVISD::VMV_V_X_VL:
2044   case RISCVISD::VFMV_V_F_VL: {
2045     // Try to match splat of a scalar load to a strided load with stride of x0.
2046     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2047                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2048     if (!Node->getOperand(0).isUndef())
2049       break;
2050     SDValue Src = Node->getOperand(1);
2051     auto *Ld = dyn_cast<LoadSDNode>(Src);
2052     // Can't fold load update node because the second
2053     // output is used so that load update node can't be removed.
2054     if (!Ld || Ld->isIndexed())
2055       break;
2056     EVT MemVT = Ld->getMemoryVT();
2057     // The memory VT should be the same size as the element type.
2058     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2059       break;
2060     if (!IsProfitableToFold(Src, Node, Node) ||
2061         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2062       break;
2063 
2064     SDValue VL;
2065     if (IsScalarMove) {
2066       // We could deal with more VL if we update the VSETVLI insert pass to
2067       // avoid introducing more VSETVLI.
2068       if (!isOneConstant(Node->getOperand(2)))
2069         break;
2070       selectVLOp(Node->getOperand(2), VL);
2071     } else
2072       selectVLOp(Node->getOperand(2), VL);
2073 
2074     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2075     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2076 
2077     // If VL=1, then we don't need to do a strided load and can just do a
2078     // regular load.
2079     bool IsStrided = !isOneConstant(VL);
2080 
2081     // Only do a strided load if we have optimized zero-stride vector load.
2082     if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2083       break;
2084 
2085     SmallVector<SDValue> Operands =
2086       {CurDAG->getUNDEF(VT), Ld->getBasePtr()};
2087     if (IsStrided)
2088       Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2089     uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
2090     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2091     Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2092 
2093     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2094     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2095         /*IsMasked*/ false, IsStrided, /*FF*/ false,
2096         Log2SEW, static_cast<unsigned>(LMUL));
2097     MachineSDNode *Load =
2098         CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2099     // Update the chain.
2100     ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2101     // Record the mem-refs
2102     CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2103     // Replace the splat with the vlse.
2104     ReplaceNode(Node, Load);
2105     return;
2106   }
2107   case ISD::PREFETCH:
2108     unsigned Locality = Node->getConstantOperandVal(3);
2109     if (Locality > 2)
2110       break;
2111 
2112     if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2113       MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2114       MMO->setFlags(MachineMemOperand::MONonTemporal);
2115 
2116       int NontemporalLevel = 0;
2117       switch (Locality) {
2118       case 0:
2119         NontemporalLevel = 3; // NTL.ALL
2120         break;
2121       case 1:
2122         NontemporalLevel = 1; // NTL.PALL
2123         break;
2124       case 2:
2125         NontemporalLevel = 0; // NTL.P1
2126         break;
2127       default:
2128         llvm_unreachable("unexpected locality value.");
2129       }
2130 
2131       if (NontemporalLevel & 0b1)
2132         MMO->setFlags(MONontemporalBit0);
2133       if (NontemporalLevel & 0b10)
2134         MMO->setFlags(MONontemporalBit1);
2135     }
2136     break;
2137   }
2138 
2139   // Select the default instruction.
2140   SelectCode(Node);
2141 }
2142 
2143 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2144     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
2145   // Always produce a register and immediate operand, as expected by
2146   // RISCVAsmPrinter::PrintAsmMemoryOperand.
2147   switch (ConstraintID) {
2148   case InlineAsm::Constraint_o:
2149   case InlineAsm::Constraint_m: {
2150     SDValue Op0, Op1;
2151     bool Found = SelectAddrRegImm(Op, Op0, Op1);
2152     assert(Found && "SelectAddrRegImm should always succeed");
2153     (void)Found;
2154     OutOps.push_back(Op0);
2155     OutOps.push_back(Op1);
2156     return false;
2157   }
2158   case InlineAsm::Constraint_A:
2159     OutOps.push_back(Op);
2160     OutOps.push_back(
2161         CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2162     return false;
2163   default:
2164     report_fatal_error("Unexpected asm memory constraint " +
2165                        InlineAsm::getMemConstraintName(ConstraintID));
2166   }
2167 
2168   return true;
2169 }
2170 
2171 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2172                                              SDValue &Offset) {
2173   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2174     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2175     Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2176     return true;
2177   }
2178 
2179   return false;
2180 }
2181 
2182 // Select a frame index and an optional immediate offset from an ADD or OR.
2183 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
2184                                               SDValue &Offset) {
2185   if (SelectAddrFrameIndex(Addr, Base, Offset))
2186     return true;
2187 
2188   if (!CurDAG->isBaseWithConstantOffset(Addr))
2189     return false;
2190 
2191   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2192     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2193     if (isInt<12>(CVal)) {
2194       Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2195                                          Subtarget->getXLenVT());
2196       Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
2197                                          Subtarget->getXLenVT());
2198       return true;
2199     }
2200   }
2201 
2202   return false;
2203 }
2204 
2205 // Fold constant addresses.
2206 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2207                                const MVT VT, const RISCVSubtarget *Subtarget,
2208                                SDValue Addr, SDValue &Base, SDValue &Offset) {
2209   if (!isa<ConstantSDNode>(Addr))
2210     return false;
2211 
2212   int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2213 
2214   // If the constant is a simm12, we can fold the whole constant and use X0 as
2215   // the base. If the constant can be materialized with LUI+simm12, use LUI as
2216   // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2217   int64_t Lo12 = SignExtend64<12>(CVal);
2218   int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2219   if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2220     if (Hi) {
2221       int64_t Hi20 = (Hi >> 12) & 0xfffff;
2222       Base = SDValue(
2223           CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2224                                  CurDAG->getTargetConstant(Hi20, DL, VT)),
2225           0);
2226     } else {
2227       Base = CurDAG->getRegister(RISCV::X0, VT);
2228     }
2229     Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2230     return true;
2231   }
2232 
2233   // Ask how constant materialization would handle this constant.
2234   RISCVMatInt::InstSeq Seq =
2235       RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits());
2236 
2237   // If the last instruction would be an ADDI, we can fold its immediate and
2238   // emit the rest of the sequence as the base.
2239   if (Seq.back().getOpcode() != RISCV::ADDI)
2240     return false;
2241   Lo12 = Seq.back().getImm();
2242 
2243   // Drop the last instruction.
2244   Seq.pop_back();
2245   assert(!Seq.empty() && "Expected more instructions in sequence");
2246 
2247   Base = selectImmSeq(CurDAG, DL, VT, Seq);
2248   Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2249   return true;
2250 }
2251 
2252 // Is this ADD instruction only used as the base pointer of scalar loads and
2253 // stores?
2254 static bool isWorthFoldingAdd(SDValue Add) {
2255   for (auto *Use : Add->uses()) {
2256     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2257         Use->getOpcode() != ISD::ATOMIC_LOAD &&
2258         Use->getOpcode() != ISD::ATOMIC_STORE)
2259       return false;
2260     EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2261     if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2262         VT != MVT::f64)
2263       return false;
2264     // Don't allow stores of the value. It must be used as the address.
2265     if (Use->getOpcode() == ISD::STORE &&
2266         cast<StoreSDNode>(Use)->getValue() == Add)
2267       return false;
2268     if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2269         cast<AtomicSDNode>(Use)->getVal() == Add)
2270       return false;
2271   }
2272 
2273   return true;
2274 }
2275 
2276 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2277                                               unsigned MaxShiftAmount,
2278                                               SDValue &Base, SDValue &Index,
2279                                               SDValue &Scale) {
2280   EVT VT = Addr.getSimpleValueType();
2281   auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2282                                               SDValue &Shift) {
2283     uint64_t ShiftAmt = 0;
2284     Index = N;
2285 
2286     if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2287       // Only match shifts by a value in range [0, MaxShiftAmount].
2288       if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2289         Index = N.getOperand(0);
2290         ShiftAmt = N.getConstantOperandVal(1);
2291       }
2292     }
2293 
2294     Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2295     return ShiftAmt != 0;
2296   };
2297 
2298   if (Addr.getOpcode() == ISD::ADD) {
2299     if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2300       SDValue AddrB = Addr.getOperand(0);
2301       if (AddrB.getOpcode() == ISD::ADD &&
2302           UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2303           !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2304           isInt<12>(C1->getSExtValue())) {
2305         // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2306         SDValue C1Val =
2307             CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2308         Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2309                                               AddrB.getOperand(1), C1Val),
2310                        0);
2311         return true;
2312       }
2313     } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2314       Base = Addr.getOperand(1);
2315       return true;
2316     } else {
2317       UnwrapShl(Addr.getOperand(1), Index, Scale);
2318       Base = Addr.getOperand(0);
2319       return true;
2320     }
2321   } else if (UnwrapShl(Addr, Index, Scale)) {
2322     EVT VT = Addr.getValueType();
2323     Base = CurDAG->getRegister(RISCV::X0, VT);
2324     return true;
2325   }
2326 
2327   return false;
2328 }
2329 
2330 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2331                                          SDValue &Offset, bool IsINX) {
2332   if (SelectAddrFrameIndex(Addr, Base, Offset))
2333     return true;
2334 
2335   SDLoc DL(Addr);
2336   MVT VT = Addr.getSimpleValueType();
2337 
2338   if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2339     Base = Addr.getOperand(0);
2340     Offset = Addr.getOperand(1);
2341     return true;
2342   }
2343 
2344   int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2345   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2346     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2347     if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2348       Base = Addr.getOperand(0);
2349       if (Base.getOpcode() == RISCVISD::ADD_LO) {
2350         SDValue LoOperand = Base.getOperand(1);
2351         if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2352           // If the Lo in (ADD_LO hi, lo) is a global variable's address
2353           // (its low part, really), then we can rely on the alignment of that
2354           // variable to provide a margin of safety before low part can overflow
2355           // the 12 bits of the load/store offset. Check if CVal falls within
2356           // that margin; if so (low part + CVal) can't overflow.
2357           const DataLayout &DL = CurDAG->getDataLayout();
2358           Align Alignment = commonAlignment(
2359               GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2360           if (CVal == 0 || Alignment > CVal) {
2361             int64_t CombinedOffset = CVal + GA->getOffset();
2362             Base = Base.getOperand(0);
2363             Offset = CurDAG->getTargetGlobalAddress(
2364                 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2365                 CombinedOffset, GA->getTargetFlags());
2366             return true;
2367           }
2368         }
2369       }
2370 
2371       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2372         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2373       Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2374       return true;
2375     }
2376   }
2377 
2378   // Handle ADD with large immediates.
2379   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2380     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2381     assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2382            "simm12 not already handled?");
2383 
2384     // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2385     // an ADDI for part of the offset and fold the rest into the load/store.
2386     // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2387     if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2388       int64_t Adj = CVal < 0 ? -2048 : 2047;
2389       Base = SDValue(
2390           CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2391                                  CurDAG->getTargetConstant(Adj, DL, VT)),
2392           0);
2393       Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2394       return true;
2395     }
2396 
2397     // For larger immediates, we might be able to save one instruction from
2398     // constant materialization by folding the Lo12 bits of the immediate into
2399     // the address. We should only do this if the ADD is only used by loads and
2400     // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2401     // separately with the full materialized immediate creating extra
2402     // instructions.
2403     if (isWorthFoldingAdd(Addr) &&
2404         selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2405                            Offset)) {
2406       // Insert an ADD instruction with the materialized Hi52 bits.
2407       Base = SDValue(
2408           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2409           0);
2410       return true;
2411     }
2412   }
2413 
2414   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2415     return true;
2416 
2417   Base = Addr;
2418   Offset = CurDAG->getTargetConstant(0, DL, VT);
2419   return true;
2420 }
2421 
2422 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
2423                                         SDValue &ShAmt) {
2424   ShAmt = N;
2425 
2426   // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2427   // amount. If there is an AND on the shift amount, we can bypass it if it
2428   // doesn't affect any of those bits.
2429   if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2430     const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2431 
2432     // Since the max shift amount is a power of 2 we can subtract 1 to make a
2433     // mask that covers the bits needed to represent all shift amounts.
2434     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2435     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2436 
2437     if (ShMask.isSubsetOf(AndMask)) {
2438       ShAmt = ShAmt.getOperand(0);
2439     } else {
2440       // SimplifyDemandedBits may have optimized the mask so try restoring any
2441       // bits that are known zero.
2442       KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2443       if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2444         return true;
2445       ShAmt = ShAmt.getOperand(0);
2446     }
2447   }
2448 
2449   if (ShAmt.getOpcode() == ISD::ADD &&
2450       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2451     uint64_t Imm = ShAmt.getConstantOperandVal(1);
2452     // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2453     // to avoid the ADD.
2454     if (Imm != 0 && Imm % ShiftWidth == 0) {
2455       ShAmt = ShAmt.getOperand(0);
2456       return true;
2457     }
2458   } else if (ShAmt.getOpcode() == ISD::SUB &&
2459              isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2460     uint64_t Imm = ShAmt.getConstantOperandVal(0);
2461     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2462     // generate a NEG instead of a SUB of a constant.
2463     if (Imm != 0 && Imm % ShiftWidth == 0) {
2464       SDLoc DL(ShAmt);
2465       EVT VT = ShAmt.getValueType();
2466       SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2467       unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2468       MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2469                                                   ShAmt.getOperand(1));
2470       ShAmt = SDValue(Neg, 0);
2471       return true;
2472     }
2473     // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2474     // to generate a NOT instead of a SUB of a constant.
2475     if (Imm % ShiftWidth == ShiftWidth - 1) {
2476       SDLoc DL(ShAmt);
2477       EVT VT = ShAmt.getValueType();
2478       MachineSDNode *Not =
2479           CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2480                                  CurDAG->getTargetConstant(-1, DL, VT));
2481       ShAmt = SDValue(Not, 0);
2482       return true;
2483     }
2484   }
2485 
2486   return true;
2487 }
2488 
2489 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2490 /// check for equality with 0. This function emits instructions that convert the
2491 /// seteq/setne into something that can be compared with 0.
2492 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2493 /// ISD::SETNE).
2494 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
2495                                     SDValue &Val) {
2496   assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2497          "Unexpected condition code!");
2498 
2499   // We're looking for a setcc.
2500   if (N->getOpcode() != ISD::SETCC)
2501     return false;
2502 
2503   // Must be an equality comparison.
2504   ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2505   if (CCVal != ExpectedCCVal)
2506     return false;
2507 
2508   SDValue LHS = N->getOperand(0);
2509   SDValue RHS = N->getOperand(1);
2510 
2511   if (!LHS.getValueType().isScalarInteger())
2512     return false;
2513 
2514   // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2515   if (isNullConstant(RHS)) {
2516     Val = LHS;
2517     return true;
2518   }
2519 
2520   SDLoc DL(N);
2521 
2522   if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2523     int64_t CVal = C->getSExtValue();
2524     // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2525     // non-zero otherwise.
2526     if (CVal == -2048) {
2527       Val =
2528           SDValue(CurDAG->getMachineNode(
2529                       RISCV::XORI, DL, N->getValueType(0), LHS,
2530                       CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2531                   0);
2532       return true;
2533     }
2534     // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2535     // LHS is equal to the RHS and non-zero otherwise.
2536     if (isInt<12>(CVal) || CVal == 2048) {
2537       Val =
2538           SDValue(CurDAG->getMachineNode(
2539                       RISCV::ADDI, DL, N->getValueType(0), LHS,
2540                       CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2541                   0);
2542       return true;
2543     }
2544   }
2545 
2546   // If nothing else we can XOR the LHS and RHS to produce zero if they are
2547   // equal and a non-zero value if they aren't.
2548   Val = SDValue(
2549       CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2550   return true;
2551 }
2552 
2553 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2554   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2555       cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2556     Val = N.getOperand(0);
2557     return true;
2558   }
2559 
2560   auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2561     if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2562       return N;
2563 
2564     SDValue N0 = N.getOperand(0);
2565     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2566         N.getConstantOperandVal(1) == ShiftAmt &&
2567         N0.getConstantOperandVal(1) == ShiftAmt)
2568       return N0.getOperand(0);
2569 
2570     return N;
2571   };
2572 
2573   MVT VT = N.getSimpleValueType();
2574   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2575     Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2576     return true;
2577   }
2578 
2579   return false;
2580 }
2581 
2582 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2583   if (N.getOpcode() == ISD::AND) {
2584     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2585     if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2586       Val = N.getOperand(0);
2587       return true;
2588     }
2589   }
2590   MVT VT = N.getSimpleValueType();
2591   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2592   if (CurDAG->MaskedValueIsZero(N, Mask)) {
2593     Val = N;
2594     return true;
2595   }
2596 
2597   return false;
2598 }
2599 
2600 /// Look for various patterns that can be done with a SHL that can be folded
2601 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2602 /// SHXADD we are trying to match.
2603 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
2604                                        SDValue &Val) {
2605   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2606     SDValue N0 = N.getOperand(0);
2607 
2608     bool LeftShift = N0.getOpcode() == ISD::SHL;
2609     if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2610         isa<ConstantSDNode>(N0.getOperand(1))) {
2611       uint64_t Mask = N.getConstantOperandVal(1);
2612       unsigned C2 = N0.getConstantOperandVal(1);
2613 
2614       unsigned XLen = Subtarget->getXLen();
2615       if (LeftShift)
2616         Mask &= maskTrailingZeros<uint64_t>(C2);
2617       else
2618         Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2619 
2620       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2621       // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2622       // followed by a SHXADD with c3 for the X amount.
2623       if (isShiftedMask_64(Mask)) {
2624         unsigned Leading = XLen - llvm::bit_width(Mask);
2625         unsigned Trailing = llvm::countr_zero(Mask);
2626         if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2627           SDLoc DL(N);
2628           EVT VT = N.getValueType();
2629           Val = SDValue(CurDAG->getMachineNode(
2630                             RISCV::SRLI, DL, VT, N0.getOperand(0),
2631                             CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2632                         0);
2633           return true;
2634         }
2635         // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2636         // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2637         // followed by a SHXADD using c3 for the X amount.
2638         if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2639           SDLoc DL(N);
2640           EVT VT = N.getValueType();
2641           Val = SDValue(
2642               CurDAG->getMachineNode(
2643                   RISCV::SRLI, DL, VT, N0.getOperand(0),
2644                   CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2645               0);
2646           return true;
2647         }
2648       }
2649     }
2650   }
2651 
2652   bool LeftShift = N.getOpcode() == ISD::SHL;
2653   if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2654       isa<ConstantSDNode>(N.getOperand(1))) {
2655     SDValue N0 = N.getOperand(0);
2656     if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2657         isa<ConstantSDNode>(N0.getOperand(1))) {
2658       uint64_t Mask = N0.getConstantOperandVal(1);
2659       if (isShiftedMask_64(Mask)) {
2660         unsigned C1 = N.getConstantOperandVal(1);
2661         unsigned XLen = Subtarget->getXLen();
2662         unsigned Leading = XLen - llvm::bit_width(Mask);
2663         unsigned Trailing = llvm::countr_zero(Mask);
2664         // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2665         // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2666         if (LeftShift && Leading == 32 && Trailing > 0 &&
2667             (Trailing + C1) == ShAmt) {
2668           SDLoc DL(N);
2669           EVT VT = N.getValueType();
2670           Val = SDValue(CurDAG->getMachineNode(
2671                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
2672                             CurDAG->getTargetConstant(Trailing, DL, VT)),
2673                         0);
2674           return true;
2675         }
2676         // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
2677         // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
2678         if (!LeftShift && Leading == 32 && Trailing > C1 &&
2679             (Trailing - C1) == ShAmt) {
2680           SDLoc DL(N);
2681           EVT VT = N.getValueType();
2682           Val = SDValue(CurDAG->getMachineNode(
2683                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
2684                             CurDAG->getTargetConstant(Trailing, DL, VT)),
2685                         0);
2686           return true;
2687         }
2688       }
2689     }
2690   }
2691 
2692   return false;
2693 }
2694 
2695 /// Look for various patterns that can be done with a SHL that can be folded
2696 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
2697 /// SHXADD_UW we are trying to match.
2698 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
2699                                           SDValue &Val) {
2700   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
2701       N.hasOneUse()) {
2702     SDValue N0 = N.getOperand(0);
2703     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2704         N0.hasOneUse()) {
2705       uint64_t Mask = N.getConstantOperandVal(1);
2706       unsigned C2 = N0.getConstantOperandVal(1);
2707 
2708       Mask &= maskTrailingZeros<uint64_t>(C2);
2709 
2710       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
2711       // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
2712       // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
2713       if (isShiftedMask_64(Mask)) {
2714         unsigned Leading = llvm::countl_zero(Mask);
2715         unsigned Trailing = llvm::countr_zero(Mask);
2716         if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
2717           SDLoc DL(N);
2718           EVT VT = N.getValueType();
2719           Val = SDValue(CurDAG->getMachineNode(
2720                             RISCV::SLLI, DL, VT, N0.getOperand(0),
2721                             CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
2722                         0);
2723           return true;
2724         }
2725       }
2726     }
2727   }
2728 
2729   return false;
2730 }
2731 
2732 // Return true if all users of this SDNode* only consume the lower \p Bits.
2733 // This can be used to form W instructions for add/sub/mul/shl even when the
2734 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
2735 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
2736 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
2737 // the add/sub/mul/shl to become non-W instructions. By checking the users we
2738 // may be able to use a W instruction and CSE with the other instruction if
2739 // this has happened. We could try to detect that the CSE opportunity exists
2740 // before doing this, but that would be more complicated.
2741 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
2742                                         const unsigned Depth) const {
2743   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
2744           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
2745           Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
2746           Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
2747           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
2748           isa<ConstantSDNode>(Node) || Depth != 0) &&
2749          "Unexpected opcode");
2750 
2751   if (Depth >= SelectionDAG::MaxRecursionDepth)
2752     return false;
2753 
2754   for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
2755     SDNode *User = *UI;
2756     // Users of this node should have already been instruction selected
2757     if (!User->isMachineOpcode())
2758       return false;
2759 
2760     // TODO: Add more opcodes?
2761     switch (User->getMachineOpcode()) {
2762     default:
2763       return false;
2764     case RISCV::ADDW:
2765     case RISCV::ADDIW:
2766     case RISCV::SUBW:
2767     case RISCV::MULW:
2768     case RISCV::SLLW:
2769     case RISCV::SLLIW:
2770     case RISCV::SRAW:
2771     case RISCV::SRAIW:
2772     case RISCV::SRLW:
2773     case RISCV::SRLIW:
2774     case RISCV::DIVW:
2775     case RISCV::DIVUW:
2776     case RISCV::REMW:
2777     case RISCV::REMUW:
2778     case RISCV::ROLW:
2779     case RISCV::RORW:
2780     case RISCV::RORIW:
2781     case RISCV::CLZW:
2782     case RISCV::CTZW:
2783     case RISCV::CPOPW:
2784     case RISCV::SLLI_UW:
2785     case RISCV::FMV_W_X:
2786     case RISCV::FCVT_H_W:
2787     case RISCV::FCVT_H_WU:
2788     case RISCV::FCVT_S_W:
2789     case RISCV::FCVT_S_WU:
2790     case RISCV::FCVT_D_W:
2791     case RISCV::FCVT_D_WU:
2792     case RISCV::TH_REVW:
2793     case RISCV::TH_SRRIW:
2794       if (Bits < 32)
2795         return false;
2796       break;
2797     case RISCV::SLL:
2798     case RISCV::SRA:
2799     case RISCV::SRL:
2800     case RISCV::ROL:
2801     case RISCV::ROR:
2802     case RISCV::BSET:
2803     case RISCV::BCLR:
2804     case RISCV::BINV:
2805       // Shift amount operands only use log2(Xlen) bits.
2806       if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
2807         return false;
2808       break;
2809     case RISCV::SLLI:
2810       // SLLI only uses the lower (XLen - ShAmt) bits.
2811       if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
2812         return false;
2813       break;
2814     case RISCV::ANDI:
2815       if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
2816         break;
2817       goto RecCheck;
2818     case RISCV::ORI: {
2819       uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
2820       if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
2821         break;
2822       [[fallthrough]];
2823     }
2824     case RISCV::AND:
2825     case RISCV::OR:
2826     case RISCV::XOR:
2827     case RISCV::XORI:
2828     case RISCV::ANDN:
2829     case RISCV::ORN:
2830     case RISCV::XNOR:
2831     case RISCV::SH1ADD:
2832     case RISCV::SH2ADD:
2833     case RISCV::SH3ADD:
2834     RecCheck:
2835       if (hasAllNBitUsers(User, Bits, Depth + 1))
2836         break;
2837       return false;
2838     case RISCV::SRLI: {
2839       unsigned ShAmt = User->getConstantOperandVal(1);
2840       // If we are shifting right by less than Bits, and users don't demand any
2841       // bits that were shifted into [Bits-1:0], then we can consider this as an
2842       // N-Bit user.
2843       if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
2844         break;
2845       return false;
2846     }
2847     case RISCV::SEXT_B:
2848     case RISCV::PACKH:
2849       if (Bits < 8)
2850         return false;
2851       break;
2852     case RISCV::SEXT_H:
2853     case RISCV::FMV_H_X:
2854     case RISCV::ZEXT_H_RV32:
2855     case RISCV::ZEXT_H_RV64:
2856     case RISCV::PACKW:
2857       if (Bits < 16)
2858         return false;
2859       break;
2860     case RISCV::PACK:
2861       if (Bits < (Subtarget->getXLen() / 2))
2862         return false;
2863       break;
2864     case RISCV::ADD_UW:
2865     case RISCV::SH1ADD_UW:
2866     case RISCV::SH2ADD_UW:
2867     case RISCV::SH3ADD_UW:
2868       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
2869       // 32 bits.
2870       if (UI.getOperandNo() != 0 || Bits < 32)
2871         return false;
2872       break;
2873     case RISCV::SB:
2874       if (UI.getOperandNo() != 0 || Bits < 8)
2875         return false;
2876       break;
2877     case RISCV::SH:
2878       if (UI.getOperandNo() != 0 || Bits < 16)
2879         return false;
2880       break;
2881     case RISCV::SW:
2882       if (UI.getOperandNo() != 0 || Bits < 32)
2883         return false;
2884       break;
2885     }
2886   }
2887 
2888   return true;
2889 }
2890 
2891 // Select a constant that can be represented as (sign_extend(imm5) << imm2).
2892 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
2893                                         SDValue &Shl2) {
2894   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
2895     int64_t Offset = C->getSExtValue();
2896     int64_t Shift;
2897     for (Shift = 0; Shift < 4; Shift++)
2898       if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
2899         break;
2900 
2901     // Constant cannot be encoded.
2902     if (Shift == 4)
2903       return false;
2904 
2905     EVT Ty = N->getValueType(0);
2906     Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
2907     Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
2908     return true;
2909   }
2910 
2911   return false;
2912 }
2913 
2914 // Select VL as a 5 bit immediate or a value that will become a register. This
2915 // allows us to choose betwen VSETIVLI or VSETVLI later.
2916 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
2917   auto *C = dyn_cast<ConstantSDNode>(N);
2918   if (C && isUInt<5>(C->getZExtValue())) {
2919     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
2920                                    N->getValueType(0));
2921   } else if (C && C->isAllOnes()) {
2922     // Treat all ones as VLMax.
2923     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
2924                                    N->getValueType(0));
2925   } else if (isa<RegisterSDNode>(N) &&
2926              cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
2927     // All our VL operands use an operand that allows GPRNoX0 or an immediate
2928     // as the register class. Convert X0 to a special immediate to pass the
2929     // MachineVerifier. This is recognized specially by the vsetvli insertion
2930     // pass.
2931     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
2932                                    N->getValueType(0));
2933   } else {
2934     VL = N;
2935   }
2936 
2937   return true;
2938 }
2939 
2940 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
2941   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
2942     return false;
2943   assert(N.getNumOperands() == 3 && "Unexpected number of operands");
2944   SplatVal = N.getOperand(1);
2945   return true;
2946 }
2947 
2948 using ValidateFn = bool (*)(int64_t);
2949 
2950 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
2951                                    SelectionDAG &DAG,
2952                                    const RISCVSubtarget &Subtarget,
2953                                    ValidateFn ValidateImm) {
2954   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
2955       !isa<ConstantSDNode>(N.getOperand(1)))
2956     return false;
2957   assert(N.getNumOperands() == 3 && "Unexpected number of operands");
2958 
2959   int64_t SplatImm =
2960       cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
2961 
2962   // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
2963   // type is wider than the resulting vector element type: an implicit
2964   // truncation first takes place. Therefore, perform a manual
2965   // truncation/sign-extension in order to ignore any truncated bits and catch
2966   // any zero-extended immediate.
2967   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
2968   // sign-extending to (XLenVT -1).
2969   MVT XLenVT = Subtarget.getXLenVT();
2970   assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
2971          "Unexpected splat operand type");
2972   MVT EltVT = N.getSimpleValueType().getVectorElementType();
2973   if (EltVT.bitsLT(XLenVT))
2974     SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
2975 
2976   if (!ValidateImm(SplatImm))
2977     return false;
2978 
2979   SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
2980   return true;
2981 }
2982 
2983 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
2984   return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
2985                                 [](int64_t Imm) { return isInt<5>(Imm); });
2986 }
2987 
2988 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
2989   return selectVSplatSimmHelper(
2990       N, SplatVal, *CurDAG, *Subtarget,
2991       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
2992 }
2993 
2994 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
2995                                                       SDValue &SplatVal) {
2996   return selectVSplatSimmHelper(
2997       N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
2998         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
2999       });
3000 }
3001 
3002 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3003                                          SDValue &SplatVal) {
3004   if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
3005       !isa<ConstantSDNode>(N.getOperand(1)))
3006     return false;
3007 
3008   int64_t SplatImm =
3009       cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
3010 
3011   if (!isUIntN(Bits, SplatImm))
3012     return false;
3013 
3014   SplatVal =
3015       CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
3016 
3017   return true;
3018 }
3019 
3020 bool RISCVDAGToDAGISel::selectExtOneUseVSplat(SDValue N, SDValue &SplatVal) {
3021   if (N->getOpcode() == ISD::SIGN_EXTEND ||
3022       N->getOpcode() == ISD::ZERO_EXTEND) {
3023     if (!N.hasOneUse())
3024       return false;
3025     N = N->getOperand(0);
3026   }
3027   return selectVSplat(N, SplatVal);
3028 }
3029 
3030 bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
3031   ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3032   if (!CFP)
3033     return false;
3034   const APFloat &APF = CFP->getValueAPF();
3035   // td can handle +0.0 already.
3036   if (APF.isPosZero())
3037     return false;
3038 
3039   MVT VT = CFP->getSimpleValueType(0);
3040 
3041   if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
3042                                                                       VT) >= 0)
3043     return false;
3044 
3045   MVT XLenVT = Subtarget->getXLenVT();
3046   if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3047     assert(APF.isNegZero() && "Unexpected constant.");
3048     return false;
3049   }
3050   SDLoc DL(N);
3051   Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3052                   *Subtarget);
3053   return true;
3054 }
3055 
3056 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3057                                        SDValue &Imm) {
3058   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3059     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3060 
3061     if (!isInt<5>(ImmVal))
3062       return false;
3063 
3064     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3065     return true;
3066   }
3067 
3068   return false;
3069 }
3070 
3071 // Try to remove sext.w if the input is a W instruction or can be made into
3072 // a W instruction cheaply.
3073 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3074   // Look for the sext.w pattern, addiw rd, rs1, 0.
3075   if (N->getMachineOpcode() != RISCV::ADDIW ||
3076       !isNullConstant(N->getOperand(1)))
3077     return false;
3078 
3079   SDValue N0 = N->getOperand(0);
3080   if (!N0.isMachineOpcode())
3081     return false;
3082 
3083   switch (N0.getMachineOpcode()) {
3084   default:
3085     break;
3086   case RISCV::ADD:
3087   case RISCV::ADDI:
3088   case RISCV::SUB:
3089   case RISCV::MUL:
3090   case RISCV::SLLI: {
3091     // Convert sext.w+add/sub/mul to their W instructions. This will create
3092     // a new independent instruction. This improves latency.
3093     unsigned Opc;
3094     switch (N0.getMachineOpcode()) {
3095     default:
3096       llvm_unreachable("Unexpected opcode!");
3097     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
3098     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3099     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
3100     case RISCV::MUL:  Opc = RISCV::MULW;  break;
3101     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3102     }
3103 
3104     SDValue N00 = N0.getOperand(0);
3105     SDValue N01 = N0.getOperand(1);
3106 
3107     // Shift amount needs to be uimm5.
3108     if (N0.getMachineOpcode() == RISCV::SLLI &&
3109         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3110       break;
3111 
3112     SDNode *Result =
3113         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3114                                N00, N01);
3115     ReplaceUses(N, Result);
3116     return true;
3117   }
3118   case RISCV::ADDW:
3119   case RISCV::ADDIW:
3120   case RISCV::SUBW:
3121   case RISCV::MULW:
3122   case RISCV::SLLIW:
3123   case RISCV::PACKW:
3124   case RISCV::TH_MULAW:
3125   case RISCV::TH_MULAH:
3126   case RISCV::TH_MULSW:
3127   case RISCV::TH_MULSH:
3128     // Result is already sign extended just remove the sext.w.
3129     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3130     ReplaceUses(N, N0.getNode());
3131     return true;
3132   }
3133 
3134   return false;
3135 }
3136 
3137 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3138   // Check that we're using V0 as a mask register.
3139   if (!isa<RegisterSDNode>(MaskOp) ||
3140       cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3141     return false;
3142 
3143   // The glued user defines V0.
3144   const auto *Glued = GlueOp.getNode();
3145 
3146   if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3147     return false;
3148 
3149   // Check that we're defining V0 as a mask register.
3150   if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3151       cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3152     return false;
3153 
3154   // Check the instruction defining V0; it needs to be a VMSET pseudo.
3155   SDValue MaskSetter = Glued->getOperand(2);
3156 
3157   const auto IsVMSet = [](unsigned Opc) {
3158     return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3159            Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3160            Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3161            Opc == RISCV::PseudoVMSET_M_B8;
3162   };
3163 
3164   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3165   // undefined behaviour if it's the wrong bitwidth, so we could choose to
3166   // assume that it's all-ones? Same applies to its VL.
3167   return MaskSetter->isMachineOpcode() &&
3168          IsVMSet(MaskSetter.getMachineOpcode());
3169 }
3170 
3171 // Return true if we can make sure mask of N is all-ones mask.
3172 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3173   return usesAllOnesMask(N->getOperand(MaskOpIdx),
3174                          N->getOperand(N->getNumOperands() - 1));
3175 }
3176 
3177 static bool isImplicitDef(SDValue V) {
3178   return V.isMachineOpcode() &&
3179          V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3180 }
3181 
3182 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
3183 // corresponding "unmasked" pseudo versions. The mask we're interested in will
3184 // take the form of a V0 physical register operand, with a glued
3185 // register-setting instruction.
3186 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
3187   const RISCV::RISCVMaskedPseudoInfo *I =
3188       RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3189   if (!I)
3190     return false;
3191 
3192   unsigned MaskOpIdx = I->MaskOpIdx;
3193   if (!usesAllOnesMask(N, MaskOpIdx))
3194     return false;
3195 
3196   // There are two classes of pseudos in the table - compares and
3197   // everything else.  See the comment on RISCVMaskedPseudo for details.
3198   const unsigned Opc = I->UnmaskedPseudo;
3199   const MCInstrDesc &MCID = TII->get(Opc);
3200   const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3201 #ifndef NDEBUG
3202   const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3203   assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
3204          RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
3205          "Masked and unmasked pseudos are inconsistent");
3206   const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3207   assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3208 #endif
3209 
3210   SmallVector<SDValue, 8> Ops;
3211   // Skip the merge operand at index 0 if !UseTUPseudo.
3212   for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3213     // Skip the mask, and the Glue.
3214     SDValue Op = N->getOperand(I);
3215     if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3216       continue;
3217     Ops.push_back(Op);
3218   }
3219 
3220   // Transitively apply any node glued to our new node.
3221   const auto *Glued = N->getGluedNode();
3222   if (auto *TGlued = Glued->getGluedNode())
3223     Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3224 
3225   SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3226   Result->setFlags(N->getFlags());
3227   ReplaceUses(N, Result);
3228 
3229   return true;
3230 }
3231 
3232 static bool IsVMerge(SDNode *N) {
3233   unsigned Opc = N->getMachineOpcode();
3234   return Opc == RISCV::PseudoVMERGE_VVM_MF8 ||
3235          Opc == RISCV::PseudoVMERGE_VVM_MF4 ||
3236          Opc == RISCV::PseudoVMERGE_VVM_MF2 ||
3237          Opc == RISCV::PseudoVMERGE_VVM_M1 ||
3238          Opc == RISCV::PseudoVMERGE_VVM_M2 ||
3239          Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8;
3240 }
3241 
3242 static bool IsVMv(SDNode *N) {
3243   unsigned Opc = N->getMachineOpcode();
3244   return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 ||
3245          Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 ||
3246          Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 ||
3247          Opc == RISCV::PseudoVMV_V_V_M8;
3248 }
3249 
3250 static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3251   switch (LMUL) {
3252   case RISCVII::LMUL_F8:
3253     return RISCV::PseudoVMSET_M_B1;
3254   case RISCVII::LMUL_F4:
3255     return RISCV::PseudoVMSET_M_B2;
3256   case RISCVII::LMUL_F2:
3257     return RISCV::PseudoVMSET_M_B4;
3258   case RISCVII::LMUL_1:
3259     return RISCV::PseudoVMSET_M_B8;
3260   case RISCVII::LMUL_2:
3261     return RISCV::PseudoVMSET_M_B16;
3262   case RISCVII::LMUL_4:
3263     return RISCV::PseudoVMSET_M_B32;
3264   case RISCVII::LMUL_8:
3265     return RISCV::PseudoVMSET_M_B64;
3266   case RISCVII::LMUL_RESERVED:
3267     llvm_unreachable("Unexpected LMUL");
3268   }
3269   llvm_unreachable("Unknown VLMUL enum");
3270 }
3271 
3272 // Try to fold away VMERGE_VVM instructions. We handle these cases:
3273 // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
3274 //  folds to a masked TU instruction. VMERGE_VVM must have have merge operand
3275 //  same as false operand.
3276 // -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
3277 //  masked TA instruction.
3278 // -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
3279 //  masked TU instruction. Both instructions must have the same merge operand.
3280 //  VMERGE_VVM must have have merge operand same as false operand.
3281 // Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied,
3282 // not the pseudo name.  That is, a TA VMERGE_VVM can be either the _TU pseudo
3283 // form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo
3284 // form.
3285 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3286   SDValue Merge, False, True, VL, Mask, Glue;
3287   // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3288   if (IsVMv(N)) {
3289     Merge = N->getOperand(0);
3290     False = N->getOperand(0);
3291     True = N->getOperand(1);
3292     VL = N->getOperand(2);
3293     // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3294     // mask later below.
3295   } else {
3296     assert(IsVMerge(N));
3297     Merge = N->getOperand(0);
3298     False = N->getOperand(1);
3299     True = N->getOperand(2);
3300     Mask = N->getOperand(3);
3301     VL = N->getOperand(4);
3302     // We always have a glue node for the mask at v0.
3303     Glue = N->getOperand(N->getNumOperands() - 1);
3304   }
3305   assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3306   assert(!Glue || Glue.getValueType() == MVT::Glue);
3307 
3308   // We require that either merge and false are the same, or that merge
3309   // is undefined.
3310   if (Merge != False && !isImplicitDef(Merge))
3311     return false;
3312 
3313   assert(True.getResNo() == 0 &&
3314          "Expect True is the first output of an instruction.");
3315 
3316   // Need N is the exactly one using True.
3317   if (!True.hasOneUse())
3318     return false;
3319 
3320   if (!True.isMachineOpcode())
3321     return false;
3322 
3323   unsigned TrueOpc = True.getMachineOpcode();
3324   const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3325   uint64_t TrueTSFlags = TrueMCID.TSFlags;
3326   bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3327 
3328   bool IsMasked = false;
3329   const RISCV::RISCVMaskedPseudoInfo *Info =
3330       RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3331   if (!Info && HasTiedDest) {
3332     Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3333     IsMasked = true;
3334   }
3335 
3336   if (!Info)
3337     return false;
3338 
3339   if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3340     // The vmerge instruction must be TU.
3341     // FIXME: This could be relaxed, but we need to handle the policy for the
3342     // resulting op correctly.
3343     if (isImplicitDef(Merge))
3344       return false;
3345     SDValue MergeOpTrue = True->getOperand(0);
3346     // Both the vmerge instruction and the True instruction must have the same
3347     // merge operand.
3348     if (False != MergeOpTrue)
3349       return false;
3350   }
3351 
3352   if (IsMasked) {
3353     assert(HasTiedDest && "Expected tied dest");
3354     // The vmerge instruction must be TU.
3355     if (isImplicitDef(Merge))
3356       return false;
3357     // The vmerge instruction must have an all 1s mask since we're going to keep
3358     // the mask from the True instruction.
3359     // FIXME: Support mask agnostic True instruction which would have an
3360     // undef merge operand.
3361     if (Mask && !usesAllOnesMask(Mask, Glue))
3362       return false;
3363   }
3364 
3365   // Skip if True has side effect.
3366   // TODO: Support vleff and vlsegff.
3367   if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3368     return false;
3369 
3370   // The last operand of a masked instruction may be glued.
3371   bool HasGlueOp = True->getGluedNode() != nullptr;
3372 
3373   // The chain operand may exist either before the glued operands or in the last
3374   // position.
3375   unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3376   bool HasChainOp =
3377       True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3378 
3379   if (HasChainOp) {
3380     // Avoid creating cycles in the DAG. We must ensure that none of the other
3381     // operands depend on True through it's Chain.
3382     SmallVector<const SDNode *, 4> LoopWorklist;
3383     SmallPtrSet<const SDNode *, 16> Visited;
3384     LoopWorklist.push_back(False.getNode());
3385     if (Mask)
3386       LoopWorklist.push_back(Mask.getNode());
3387     LoopWorklist.push_back(VL.getNode());
3388     if (Glue)
3389       LoopWorklist.push_back(Glue.getNode());
3390     if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3391       return false;
3392   }
3393 
3394   // The vector policy operand may be present for masked intrinsics
3395   bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3396   unsigned TrueVLIndex =
3397       True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3398   SDValue TrueVL = True.getOperand(TrueVLIndex);
3399   SDValue SEW = True.getOperand(TrueVLIndex + 1);
3400 
3401   auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3402     if (LHS == RHS)
3403       return LHS;
3404     if (isAllOnesConstant(LHS))
3405       return RHS;
3406     if (isAllOnesConstant(RHS))
3407       return LHS;
3408     auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3409     auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3410     if (!CLHS || !CRHS)
3411       return SDValue();
3412     return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3413   };
3414 
3415   // Because N and True must have the same merge operand (or True's operand is
3416   // implicit_def), the "effective" body is the minimum of their VLs.
3417   SDValue OrigVL = VL;
3418   VL = GetMinVL(TrueVL, VL);
3419   if (!VL)
3420     return false;
3421 
3422   // If we end up changing the VL or mask of True, then we need to make sure it
3423   // doesn't raise any observable fp exceptions, since changing the active
3424   // elements will affect how fflags is set.
3425   if (TrueVL != VL || !IsMasked)
3426     if (mayRaiseFPException(True.getNode()) &&
3427         !True->getFlags().hasNoFPExcept())
3428       return false;
3429 
3430   SDLoc DL(N);
3431 
3432   // From the preconditions we checked above, we know the mask and thus glue
3433   // for the result node will be taken from True.
3434   if (IsMasked) {
3435     Mask = True->getOperand(Info->MaskOpIdx);
3436     Glue = True->getOperand(True->getNumOperands() - 1);
3437     assert(Glue.getValueType() == MVT::Glue);
3438   }
3439   // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3440   // an all-ones mask to use.
3441   else if (IsVMv(N)) {
3442     unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3443     unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3444     ElementCount EC = N->getValueType(0).getVectorElementCount();
3445     MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3446 
3447     SDValue AllOnesMask =
3448         SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3449     SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
3450                                             RISCV::V0, AllOnesMask, SDValue());
3451     Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3452     Glue = MaskCopy.getValue(1);
3453   }
3454 
3455   unsigned MaskedOpc = Info->MaskedPseudo;
3456 #ifndef NDEBUG
3457   const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3458   assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
3459          "Expected instructions with mask have policy operand.");
3460   assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3461                                          MCOI::TIED_TO) == 0 &&
3462          "Expected instructions with mask have a tied dest.");
3463 #endif
3464 
3465   // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3466   // operand is undefined.
3467   //
3468   // However, if the VL became smaller than what the vmerge had originally, then
3469   // elements past VL that were previously in the vmerge's body will have moved
3470   // to the tail. In that case we always need to use tail undisturbed to
3471   // preserve them.
3472   bool MergeVLShrunk = VL != OrigVL;
3473   uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3474                         ? RISCVII::TAIL_AGNOSTIC
3475                         : /*TUMU*/ 0;
3476   SDValue PolicyOp =
3477     CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3478 
3479 
3480   SmallVector<SDValue, 8> Ops;
3481   Ops.push_back(False);
3482 
3483   const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3484   const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3485   assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3486   Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3487 
3488   Ops.push_back(Mask);
3489 
3490   // For unmasked "VOp" with rounding mode operand, that is interfaces like
3491   // (..., rm, vl) or (..., rm, vl, policy).
3492   // Its masked version is (..., vm, rm, vl, policy).
3493   // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3494   if (HasRoundingMode)
3495     Ops.push_back(True->getOperand(TrueVLIndex - 1));
3496 
3497   Ops.append({VL, SEW, PolicyOp});
3498 
3499   // Result node should have chain operand of True.
3500   if (HasChainOp)
3501     Ops.push_back(True.getOperand(TrueChainOpIdx));
3502 
3503   // Add the glue for the CopyToReg of mask->v0.
3504   Ops.push_back(Glue);
3505 
3506   SDNode *Result =
3507       CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3508   Result->setFlags(True->getFlags());
3509 
3510   // Replace vmerge.vvm node by Result.
3511   ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3512 
3513   // Replace another value of True. E.g. chain and VL.
3514   for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3515     ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3516 
3517   // Try to transform Result to unmasked intrinsic.
3518   doPeepholeMaskedRVV(Result);
3519   return true;
3520 }
3521 
3522 // Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
3523 // (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
3524 bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) {
3525 #define CASE_VMERGE_TO_VMV(lmul)                                               \
3526   case RISCV::PseudoVMERGE_VVM_##lmul:                                    \
3527     NewOpc = RISCV::PseudoVMV_V_V_##lmul;                                 \
3528     break;
3529   unsigned NewOpc;
3530   switch (N->getMachineOpcode()) {
3531   default:
3532     llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
3533   CASE_VMERGE_TO_VMV(MF8)
3534   CASE_VMERGE_TO_VMV(MF4)
3535   CASE_VMERGE_TO_VMV(MF2)
3536   CASE_VMERGE_TO_VMV(M1)
3537   CASE_VMERGE_TO_VMV(M2)
3538   CASE_VMERGE_TO_VMV(M4)
3539   CASE_VMERGE_TO_VMV(M8)
3540   }
3541 
3542   if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
3543     return false;
3544 
3545   SDLoc DL(N);
3546   SDValue PolicyOp =
3547     CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT());
3548   SDNode *Result = CurDAG->getMachineNode(
3549       NewOpc, DL, N->getValueType(0),
3550       {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5),
3551        PolicyOp});
3552   ReplaceUses(N, Result);
3553   return true;
3554 }
3555 
3556 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3557   bool MadeChange = false;
3558   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3559 
3560   while (Position != CurDAG->allnodes_begin()) {
3561     SDNode *N = &*--Position;
3562     if (N->use_empty() || !N->isMachineOpcode())
3563       continue;
3564 
3565     if (IsVMerge(N) || IsVMv(N))
3566       MadeChange |= performCombineVMergeAndVOps(N);
3567     if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
3568       MadeChange |= performVMergeToVMv(N);
3569   }
3570   return MadeChange;
3571 }
3572 
3573 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
3574 // for instruction scheduling.
3575 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
3576                                        CodeGenOpt::Level OptLevel) {
3577   return new RISCVDAGToDAGISel(TM, OptLevel);
3578 }
3579 
3580 char RISCVDAGToDAGISel::ID = 0;
3581 
3582 INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
3583