xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (revision 52418fc2be8efa5172b90a3a9e617017173612c4)
1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISC-V target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMCTargetDesc.h"
16 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVISelLowering.h"
18 #include "RISCVMachineFunctionInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/IR/IntrinsicsRISCV.h"
21 #include "llvm/Support/Alignment.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "riscv-isel"
29 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30 
31 static cl::opt<bool> UsePseudoMovImm(
32     "riscv-use-rematerializable-movimm", cl::Hidden,
33     cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34              "constant materialization"),
35     cl::init(false));
36 
37 namespace llvm::RISCV {
38 #define GET_RISCVVSSEGTable_IMPL
39 #define GET_RISCVVLSEGTable_IMPL
40 #define GET_RISCVVLXSEGTable_IMPL
41 #define GET_RISCVVSXSEGTable_IMPL
42 #define GET_RISCVVLETable_IMPL
43 #define GET_RISCVVSETable_IMPL
44 #define GET_RISCVVLXTable_IMPL
45 #define GET_RISCVVSXTable_IMPL
46 #include "RISCVGenSearchableTables.inc"
47 } // namespace llvm::RISCV
48 
PreprocessISelDAG()49 void RISCVDAGToDAGISel::PreprocessISelDAG() {
50   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
51 
52   bool MadeChange = false;
53   while (Position != CurDAG->allnodes_begin()) {
54     SDNode *N = &*--Position;
55     if (N->use_empty())
56       continue;
57 
58     SDValue Result;
59     switch (N->getOpcode()) {
60     case ISD::SPLAT_VECTOR: {
61       // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62       // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63       MVT VT = N->getSimpleValueType(0);
64       unsigned Opc =
65           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
66       SDLoc DL(N);
67       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68       SDValue Src = N->getOperand(0);
69       if (VT.isInteger())
70         Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71                               N->getOperand(0));
72       Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73       break;
74     }
75     case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
76       // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77       // load. Done after lowering and combining so that we have a chance to
78       // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79       assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80       MVT VT = N->getSimpleValueType(0);
81       SDValue Passthru = N->getOperand(0);
82       SDValue Lo = N->getOperand(1);
83       SDValue Hi = N->getOperand(2);
84       SDValue VL = N->getOperand(3);
85       assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86              Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87              "Unexpected VTs!");
88       MachineFunction &MF = CurDAG->getMachineFunction();
89       SDLoc DL(N);
90 
91       // Create temporary stack for each expanding node.
92       SDValue StackSlot =
93           CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
94       int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
95       MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
96 
97       SDValue Chain = CurDAG->getEntryNode();
98       Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99 
100       SDValue OffsetSlot =
101           CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
102       Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103                             Align(8));
104 
105       Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106 
107       SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108       SDValue IntID =
109           CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110       SDValue Ops[] = {Chain,
111                        IntID,
112                        Passthru,
113                        StackSlot,
114                        CurDAG->getRegister(RISCV::X0, MVT::i64),
115                        VL};
116 
117       Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
118                                            MVT::i64, MPI, Align(8),
119                                            MachineMemOperand::MOLoad);
120       break;
121     }
122     }
123 
124     if (Result) {
125       LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld:    ");
126       LLVM_DEBUG(N->dump(CurDAG));
127       LLVM_DEBUG(dbgs() << "\nNew: ");
128       LLVM_DEBUG(Result->dump(CurDAG));
129       LLVM_DEBUG(dbgs() << "\n");
130 
131       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
132       MadeChange = true;
133     }
134   }
135 
136   if (MadeChange)
137     CurDAG->RemoveDeadNodes();
138 }
139 
PostprocessISelDAG()140 void RISCVDAGToDAGISel::PostprocessISelDAG() {
141   HandleSDNode Dummy(CurDAG->getRoot());
142   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
143 
144   bool MadeChange = false;
145   while (Position != CurDAG->allnodes_begin()) {
146     SDNode *N = &*--Position;
147     // Skip dead nodes and any non-machine opcodes.
148     if (N->use_empty() || !N->isMachineOpcode())
149       continue;
150 
151     MadeChange |= doPeepholeSExtW(N);
152 
153     // FIXME: This is here only because the VMerge transform doesn't
154     // know how to handle masked true inputs.  Once that has been moved
155     // to post-ISEL, this can be deleted as well.
156     MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157   }
158 
159   CurDAG->setRoot(Dummy.getValue());
160 
161   MadeChange |= doPeepholeMergeVVMFold();
162 
163   // After we're done with everything else, convert IMPLICIT_DEF
164   // passthru operands to NoRegister.  This is required to workaround
165   // an optimization deficiency in MachineCSE.  This really should
166   // be merged back into each of the patterns (i.e. there's no good
167   // reason not to go directly to NoReg), but is being done this way
168   // to allow easy backporting.
169   MadeChange |= doPeepholeNoRegPassThru();
170 
171   if (MadeChange)
172     CurDAG->RemoveDeadNodes();
173 }
174 
selectImmSeq(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,RISCVMatInt::InstSeq & Seq)175 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
176                             RISCVMatInt::InstSeq &Seq) {
177   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178   for (const RISCVMatInt::Inst &Inst : Seq) {
179     SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180     SDNode *Result = nullptr;
181     switch (Inst.getOpndKind()) {
182     case RISCVMatInt::Imm:
183       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184       break;
185     case RISCVMatInt::RegX0:
186       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187                                       CurDAG->getRegister(RISCV::X0, VT));
188       break;
189     case RISCVMatInt::RegReg:
190       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191       break;
192     case RISCVMatInt::RegImm:
193       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194       break;
195     }
196 
197     // Only the first instruction has X0 as its source.
198     SrcReg = SDValue(Result, 0);
199   }
200 
201   return SrcReg;
202 }
203 
selectImm(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,int64_t Imm,const RISCVSubtarget & Subtarget)204 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205                          int64_t Imm, const RISCVSubtarget &Subtarget) {
206   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
207 
208   // Use a rematerializable pseudo instruction for short sequences if enabled.
209   if (Seq.size() == 2 && UsePseudoMovImm)
210     return SDValue(
211         CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212                                CurDAG->getTargetConstant(Imm, DL, VT)),
213         0);
214 
215   // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216   // worst an LUI+ADDIW. This will require an extra register, but avoids a
217   // constant pool.
218   // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219   // low and high 32 bits are the same and bit 31 and 63 are set.
220   if (Seq.size() > 3) {
221     unsigned ShiftAmt, AddOpc;
222     RISCVMatInt::InstSeq SeqLo =
223         RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224     if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225       SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226 
227       SDValue SLLI = SDValue(
228           CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229                                  CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230           0);
231       return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232     }
233   }
234 
235   // Otherwise, use the original sequence.
236   return selectImmSeq(CurDAG, DL, VT, Seq);
237 }
238 
createTuple(SelectionDAG & CurDAG,ArrayRef<SDValue> Regs,unsigned NF,RISCVII::VLMUL LMUL)239 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
240                            unsigned NF, RISCVII::VLMUL LMUL) {
241   static const unsigned M1TupleRegClassIDs[] = {
242       RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
243       RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
244       RISCV::VRN8M1RegClassID};
245   static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
246                                                 RISCV::VRN3M2RegClassID,
247                                                 RISCV::VRN4M2RegClassID};
248 
249   assert(Regs.size() >= 2 && Regs.size() <= 8);
250 
251   unsigned RegClassID;
252   unsigned SubReg0;
253   switch (LMUL) {
254   default:
255     llvm_unreachable("Invalid LMUL.");
256   case RISCVII::VLMUL::LMUL_F8:
257   case RISCVII::VLMUL::LMUL_F4:
258   case RISCVII::VLMUL::LMUL_F2:
259   case RISCVII::VLMUL::LMUL_1:
260     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
261                   "Unexpected subreg numbering");
262     SubReg0 = RISCV::sub_vrm1_0;
263     RegClassID = M1TupleRegClassIDs[NF - 2];
264     break;
265   case RISCVII::VLMUL::LMUL_2:
266     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
267                   "Unexpected subreg numbering");
268     SubReg0 = RISCV::sub_vrm2_0;
269     RegClassID = M2TupleRegClassIDs[NF - 2];
270     break;
271   case RISCVII::VLMUL::LMUL_4:
272     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
273                   "Unexpected subreg numbering");
274     SubReg0 = RISCV::sub_vrm4_0;
275     RegClassID = RISCV::VRN2M4RegClassID;
276     break;
277   }
278 
279   SDLoc DL(Regs[0]);
280   SmallVector<SDValue, 8> Ops;
281 
282   Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
283 
284   for (unsigned I = 0; I < Regs.size(); ++I) {
285     Ops.push_back(Regs[I]);
286     Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
287   }
288   SDNode *N =
289       CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
290   return SDValue(N, 0);
291 }
292 
addVectorLoadStoreOperands(SDNode * Node,unsigned Log2SEW,const SDLoc & DL,unsigned CurOp,bool IsMasked,bool IsStridedOrIndexed,SmallVectorImpl<SDValue> & Operands,bool IsLoad,MVT * IndexVT)293 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
294     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
295     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
296     bool IsLoad, MVT *IndexVT) {
297   SDValue Chain = Node->getOperand(0);
298   SDValue Glue;
299 
300   Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
301 
302   if (IsStridedOrIndexed) {
303     Operands.push_back(Node->getOperand(CurOp++)); // Index.
304     if (IndexVT)
305       *IndexVT = Operands.back()->getSimpleValueType(0);
306   }
307 
308   if (IsMasked) {
309     // Mask needs to be copied to V0.
310     SDValue Mask = Node->getOperand(CurOp++);
311     Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
312     Glue = Chain.getValue(1);
313     Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
314   }
315   SDValue VL;
316   selectVLOp(Node->getOperand(CurOp++), VL);
317   Operands.push_back(VL);
318 
319   MVT XLenVT = Subtarget->getXLenVT();
320   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
321   Operands.push_back(SEWOp);
322 
323   // At the IR layer, all the masked load intrinsics have policy operands,
324   // none of the others do.  All have passthru operands.  For our pseudos,
325   // all loads have policy operands.
326   if (IsLoad) {
327     uint64_t Policy = RISCVII::MASK_AGNOSTIC;
328     if (IsMasked)
329       Policy = Node->getConstantOperandVal(CurOp++);
330     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
331     Operands.push_back(PolicyOp);
332   }
333 
334   Operands.push_back(Chain); // Chain.
335   if (Glue)
336     Operands.push_back(Glue);
337 }
338 
selectVLSEG(SDNode * Node,bool IsMasked,bool IsStrided)339 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
340                                     bool IsStrided) {
341   SDLoc DL(Node);
342   unsigned NF = Node->getNumValues() - 1;
343   MVT VT = Node->getSimpleValueType(0);
344   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
345   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
346 
347   unsigned CurOp = 2;
348   SmallVector<SDValue, 8> Operands;
349 
350   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
351                                Node->op_begin() + CurOp + NF);
352   SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
353   Operands.push_back(Merge);
354   CurOp += NF;
355 
356   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
357                              Operands, /*IsLoad=*/true);
358 
359   const RISCV::VLSEGPseudo *P =
360       RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
361                             static_cast<unsigned>(LMUL));
362   MachineSDNode *Load =
363       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
364 
365   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
366     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
367 
368   SDValue SuperReg = SDValue(Load, 0);
369   for (unsigned I = 0; I < NF; ++I) {
370     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
371     ReplaceUses(SDValue(Node, I),
372                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
373   }
374 
375   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
376   CurDAG->RemoveDeadNode(Node);
377 }
378 
selectVLSEGFF(SDNode * Node,bool IsMasked)379 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
380   SDLoc DL(Node);
381   unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
382   MVT VT = Node->getSimpleValueType(0);
383   MVT XLenVT = Subtarget->getXLenVT();
384   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
385   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
386 
387   unsigned CurOp = 2;
388   SmallVector<SDValue, 7> Operands;
389 
390   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
391                                Node->op_begin() + CurOp + NF);
392   SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
393   Operands.push_back(MaskedOff);
394   CurOp += NF;
395 
396   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
397                              /*IsStridedOrIndexed*/ false, Operands,
398                              /*IsLoad=*/true);
399 
400   const RISCV::VLSEGPseudo *P =
401       RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
402                             Log2SEW, static_cast<unsigned>(LMUL));
403   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
404                                                XLenVT, MVT::Other, Operands);
405 
406   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
407     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
408 
409   SDValue SuperReg = SDValue(Load, 0);
410   for (unsigned I = 0; I < NF; ++I) {
411     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
412     ReplaceUses(SDValue(Node, I),
413                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
414   }
415 
416   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));     // VL
417   ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
418   CurDAG->RemoveDeadNode(Node);
419 }
420 
selectVLXSEG(SDNode * Node,bool IsMasked,bool IsOrdered)421 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
422                                      bool IsOrdered) {
423   SDLoc DL(Node);
424   unsigned NF = Node->getNumValues() - 1;
425   MVT VT = Node->getSimpleValueType(0);
426   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
427   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
428 
429   unsigned CurOp = 2;
430   SmallVector<SDValue, 8> Operands;
431 
432   SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
433                                Node->op_begin() + CurOp + NF);
434   SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
435   Operands.push_back(MaskedOff);
436   CurOp += NF;
437 
438   MVT IndexVT;
439   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440                              /*IsStridedOrIndexed*/ true, Operands,
441                              /*IsLoad=*/true, &IndexVT);
442 
443   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
444          "Element count mismatch");
445 
446   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449     report_fatal_error("The V extension does not support EEW=64 for index "
450                        "values when XLEN=32");
451   }
452   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454       static_cast<unsigned>(IndexLMUL));
455   MachineSDNode *Load =
456       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457 
458   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459     CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460 
461   SDValue SuperReg = SDValue(Load, 0);
462   for (unsigned I = 0; I < NF; ++I) {
463     unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464     ReplaceUses(SDValue(Node, I),
465                 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466   }
467 
468   ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469   CurDAG->RemoveDeadNode(Node);
470 }
471 
selectVSSEG(SDNode * Node,bool IsMasked,bool IsStrided)472 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473                                     bool IsStrided) {
474   SDLoc DL(Node);
475   unsigned NF = Node->getNumOperands() - 4;
476   if (IsStrided)
477     NF--;
478   if (IsMasked)
479     NF--;
480   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
482   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
483   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485 
486   SmallVector<SDValue, 8> Operands;
487   Operands.push_back(StoreVal);
488   unsigned CurOp = 2 + NF;
489 
490   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491                              Operands);
492 
493   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495   MachineSDNode *Store =
496       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497 
498   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500 
501   ReplaceNode(Node, Store);
502 }
503 
selectVSXSEG(SDNode * Node,bool IsMasked,bool IsOrdered)504 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505                                      bool IsOrdered) {
506   SDLoc DL(Node);
507   unsigned NF = Node->getNumOperands() - 5;
508   if (IsMasked)
509     --NF;
510   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511   unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
512   RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
513   SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514   SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515 
516   SmallVector<SDValue, 8> Operands;
517   Operands.push_back(StoreVal);
518   unsigned CurOp = 2 + NF;
519 
520   MVT IndexVT;
521   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522                              /*IsStridedOrIndexed*/ true, Operands,
523                              /*IsLoad=*/false, &IndexVT);
524 
525   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
526          "Element count mismatch");
527 
528   RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531     report_fatal_error("The V extension does not support EEW=64 for index "
532                        "values when XLEN=32");
533   }
534   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536       static_cast<unsigned>(IndexLMUL));
537   MachineSDNode *Store =
538       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539 
540   if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541     CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542 
543   ReplaceNode(Node, Store);
544 }
545 
selectVSETVLI(SDNode * Node)546 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
547   if (!Subtarget->hasVInstructions())
548     return;
549 
550   assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
551 
552   SDLoc DL(Node);
553   MVT XLenVT = Subtarget->getXLenVT();
554 
555   unsigned IntNo = Node->getConstantOperandVal(0);
556 
557   assert((IntNo == Intrinsic::riscv_vsetvli ||
558           IntNo == Intrinsic::riscv_vsetvlimax) &&
559          "Unexpected vsetvli intrinsic");
560 
561   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
562   unsigned Offset = (VLMax ? 1 : 2);
563 
564   assert(Node->getNumOperands() == Offset + 2 &&
565          "Unexpected number of operands");
566 
567   unsigned SEW =
568       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
569   RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
570       Node->getConstantOperandVal(Offset + 1) & 0x7);
571 
572   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
573                                             /*MaskAgnostic*/ true);
574   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
575 
576   SDValue VLOperand;
577   unsigned Opcode = RISCV::PseudoVSETVLI;
578   if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
579     if (auto VLEN = Subtarget->getRealVLen())
580       if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
581         VLMax = true;
582   }
583   if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
584     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
585     Opcode = RISCV::PseudoVSETVLIX0;
586   } else {
587     VLOperand = Node->getOperand(1);
588 
589     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
590       uint64_t AVL = C->getZExtValue();
591       if (isUInt<5>(AVL)) {
592         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
593         ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
594                                                  XLenVT, VLImm, VTypeIOp));
595         return;
596       }
597     }
598   }
599 
600   ReplaceNode(Node,
601               CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
602 }
603 
tryShrinkShlLogicImm(SDNode * Node)604 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
605   MVT VT = Node->getSimpleValueType(0);
606   unsigned Opcode = Node->getOpcode();
607   assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
608          "Unexpected opcode");
609   SDLoc DL(Node);
610 
611   // For operations of the form (x << C1) op C2, check if we can use
612   // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
613   SDValue N0 = Node->getOperand(0);
614   SDValue N1 = Node->getOperand(1);
615 
616   ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
617   if (!Cst)
618     return false;
619 
620   int64_t Val = Cst->getSExtValue();
621 
622   // Check if immediate can already use ANDI/ORI/XORI.
623   if (isInt<12>(Val))
624     return false;
625 
626   SDValue Shift = N0;
627 
628   // If Val is simm32 and we have a sext_inreg from i32, then the binop
629   // produces at least 33 sign bits. We can peek through the sext_inreg and use
630   // a SLLIW at the end.
631   bool SignExt = false;
632   if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
633       N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
634     SignExt = true;
635     Shift = N0.getOperand(0);
636   }
637 
638   if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
639     return false;
640 
641   ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
642   if (!ShlCst)
643     return false;
644 
645   uint64_t ShAmt = ShlCst->getZExtValue();
646 
647   // Make sure that we don't change the operation by removing bits.
648   // This only matters for OR and XOR, AND is unaffected.
649   uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
650   if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
651     return false;
652 
653   int64_t ShiftedVal = Val >> ShAmt;
654   if (!isInt<12>(ShiftedVal))
655     return false;
656 
657   // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
658   if (SignExt && ShAmt >= 32)
659     return false;
660 
661   // Ok, we can reorder to get a smaller immediate.
662   unsigned BinOpc;
663   switch (Opcode) {
664   default: llvm_unreachable("Unexpected opcode");
665   case ISD::AND: BinOpc = RISCV::ANDI; break;
666   case ISD::OR:  BinOpc = RISCV::ORI;  break;
667   case ISD::XOR: BinOpc = RISCV::XORI; break;
668   }
669 
670   unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
671 
672   SDNode *BinOp =
673       CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
674                              CurDAG->getTargetConstant(ShiftedVal, DL, VT));
675   SDNode *SLLI =
676       CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
677                              CurDAG->getTargetConstant(ShAmt, DL, VT));
678   ReplaceNode(Node, SLLI);
679   return true;
680 }
681 
trySignedBitfieldExtract(SDNode * Node)682 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
683   // Only supported with XTHeadBb at the moment.
684   if (!Subtarget->hasVendorXTHeadBb())
685     return false;
686 
687   auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
688   if (!N1C)
689     return false;
690 
691   SDValue N0 = Node->getOperand(0);
692   if (!N0.hasOneUse())
693     return false;
694 
695   auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
696                              MVT VT) {
697     return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
698                                   CurDAG->getTargetConstant(Msb, DL, VT),
699                                   CurDAG->getTargetConstant(Lsb, DL, VT));
700   };
701 
702   SDLoc DL(Node);
703   MVT VT = Node->getSimpleValueType(0);
704   const unsigned RightShAmt = N1C->getZExtValue();
705 
706   // Transform (sra (shl X, C1) C2) with C1 < C2
707   //        -> (TH.EXT X, msb, lsb)
708   if (N0.getOpcode() == ISD::SHL) {
709     auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
710     if (!N01C)
711       return false;
712 
713     const unsigned LeftShAmt = N01C->getZExtValue();
714     // Make sure that this is a bitfield extraction (i.e., the shift-right
715     // amount can not be less than the left-shift).
716     if (LeftShAmt > RightShAmt)
717       return false;
718 
719     const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
720     const unsigned Msb = MsbPlusOne - 1;
721     const unsigned Lsb = RightShAmt - LeftShAmt;
722 
723     SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724     ReplaceNode(Node, TH_EXT);
725     return true;
726   }
727 
728   // Transform (sra (sext_inreg X, _), C) ->
729   //           (TH.EXT X, msb, lsb)
730   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
731     unsigned ExtSize =
732         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
733 
734     // ExtSize of 32 should use sraiw via tablegen pattern.
735     if (ExtSize == 32)
736       return false;
737 
738     const unsigned Msb = ExtSize - 1;
739     const unsigned Lsb = RightShAmt;
740 
741     SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
742     ReplaceNode(Node, TH_EXT);
743     return true;
744   }
745 
746   return false;
747 }
748 
tryIndexedLoad(SDNode * Node)749 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
750   // Target does not support indexed loads.
751   if (!Subtarget->hasVendorXTHeadMemIdx())
752     return false;
753 
754   LoadSDNode *Ld = cast<LoadSDNode>(Node);
755   ISD::MemIndexedMode AM = Ld->getAddressingMode();
756   if (AM == ISD::UNINDEXED)
757     return false;
758 
759   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
760   if (!C)
761     return false;
762 
763   EVT LoadVT = Ld->getMemoryVT();
764   assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
765          "Unexpected addressing mode");
766   bool IsPre = AM == ISD::PRE_INC;
767   bool IsPost = AM == ISD::POST_INC;
768   int64_t Offset = C->getSExtValue();
769 
770   // The constants that can be encoded in the THeadMemIdx instructions
771   // are of the form (sign_extend(imm5) << imm2).
772   int64_t Shift;
773   for (Shift = 0; Shift < 4; Shift++)
774     if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
775       break;
776 
777   // Constant cannot be encoded.
778   if (Shift == 4)
779     return false;
780 
781   bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
782   unsigned Opcode;
783   if (LoadVT == MVT::i8 && IsPre)
784     Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
785   else if (LoadVT == MVT::i8 && IsPost)
786     Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
787   else if (LoadVT == MVT::i16 && IsPre)
788     Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
789   else if (LoadVT == MVT::i16 && IsPost)
790     Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
791   else if (LoadVT == MVT::i32 && IsPre)
792     Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
793   else if (LoadVT == MVT::i32 && IsPost)
794     Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
795   else if (LoadVT == MVT::i64 && IsPre)
796     Opcode = RISCV::TH_LDIB;
797   else if (LoadVT == MVT::i64 && IsPost)
798     Opcode = RISCV::TH_LDIA;
799   else
800     return false;
801 
802   EVT Ty = Ld->getOffset().getValueType();
803   SDValue Ops[] = {Ld->getBasePtr(),
804                    CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
805                    CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
806                    Ld->getChain()};
807   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
808                                        Ld->getValueType(1), MVT::Other, Ops);
809 
810   MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
811   CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
812 
813   ReplaceNode(Node, New);
814 
815   return true;
816 }
817 
selectSF_VC_X_SE(SDNode * Node)818 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
819   if (!Subtarget->hasVInstructions())
820     return;
821 
822   assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
823 
824   SDLoc DL(Node);
825   unsigned IntNo = Node->getConstantOperandVal(1);
826 
827   assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
828           IntNo == Intrinsic::riscv_sf_vc_i_se) &&
829          "Unexpected vsetvli intrinsic");
830 
831   // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
832   unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
833   SDValue SEWOp =
834       CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
835   SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
836                                       Node->getOperand(4), Node->getOperand(5),
837                                       Node->getOperand(8), SEWOp,
838                                       Node->getOperand(0)};
839 
840   unsigned Opcode;
841   auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
842   switch (LMulSDNode->getSExtValue()) {
843   case 5:
844     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
845                                                   : RISCV::PseudoVC_I_SE_MF8;
846     break;
847   case 6:
848     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
849                                                   : RISCV::PseudoVC_I_SE_MF4;
850     break;
851   case 7:
852     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
853                                                   : RISCV::PseudoVC_I_SE_MF2;
854     break;
855   case 0:
856     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
857                                                   : RISCV::PseudoVC_I_SE_M1;
858     break;
859   case 1:
860     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
861                                                   : RISCV::PseudoVC_I_SE_M2;
862     break;
863   case 2:
864     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
865                                                   : RISCV::PseudoVC_I_SE_M4;
866     break;
867   case 3:
868     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
869                                                   : RISCV::PseudoVC_I_SE_M8;
870     break;
871   }
872 
873   ReplaceNode(Node, CurDAG->getMachineNode(
874                         Opcode, DL, Node->getSimpleValueType(0), Operands));
875 }
876 
Select(SDNode * Node)877 void RISCVDAGToDAGISel::Select(SDNode *Node) {
878   // If we have a custom node, we have already selected.
879   if (Node->isMachineOpcode()) {
880     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
881     Node->setNodeId(-1);
882     return;
883   }
884 
885   // Instruction Selection not handled by the auto-generated tablegen selection
886   // should be handled here.
887   unsigned Opcode = Node->getOpcode();
888   MVT XLenVT = Subtarget->getXLenVT();
889   SDLoc DL(Node);
890   MVT VT = Node->getSimpleValueType(0);
891 
892   bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
893 
894   switch (Opcode) {
895   case ISD::Constant: {
896     assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
897     auto *ConstNode = cast<ConstantSDNode>(Node);
898     if (ConstNode->isZero()) {
899       SDValue New =
900           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
901       ReplaceNode(Node, New.getNode());
902       return;
903     }
904     int64_t Imm = ConstNode->getSExtValue();
905     // If only the lower 8 bits are used, try to convert this to a simm6 by
906     // sign-extending bit 7. This is neutral without the C extension, and
907     // allows C.LI to be used if C is present.
908     if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
909       Imm = SignExtend64<8>(Imm);
910     // If the upper XLen-16 bits are not used, try to convert this to a simm12
911     // by sign extending bit 15.
912     if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
913         hasAllHUsers(Node))
914       Imm = SignExtend64<16>(Imm);
915     // If the upper 32-bits are not used try to convert this into a simm32 by
916     // sign extending bit 32.
917     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
918       Imm = SignExtend64<32>(Imm);
919 
920     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
921     return;
922   }
923   case ISD::ConstantFP: {
924     const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
925     auto [FPImm, NeedsFNeg] =
926         static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
927                                                                         VT);
928     if (FPImm >= 0) {
929       unsigned Opc;
930       unsigned FNegOpc;
931       switch (VT.SimpleTy) {
932       default:
933         llvm_unreachable("Unexpected size");
934       case MVT::f16:
935         Opc = RISCV::FLI_H;
936         FNegOpc = RISCV::FSGNJN_H;
937         break;
938       case MVT::f32:
939         Opc = RISCV::FLI_S;
940         FNegOpc = RISCV::FSGNJN_S;
941         break;
942       case MVT::f64:
943         Opc = RISCV::FLI_D;
944         FNegOpc = RISCV::FSGNJN_D;
945         break;
946       }
947       SDNode *Res = CurDAG->getMachineNode(
948           Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
949       if (NeedsFNeg)
950         Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
951                                      SDValue(Res, 0));
952 
953       ReplaceNode(Node, Res);
954       return;
955     }
956 
957     bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
958     SDValue Imm;
959     // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
960     // create an integer immediate.
961     if (APF.isPosZero() || NegZeroF64)
962       Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
963     else
964       Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
965                       *Subtarget);
966 
967     bool HasZdinx = Subtarget->hasStdExtZdinx();
968     bool Is64Bit = Subtarget->is64Bit();
969     unsigned Opc;
970     switch (VT.SimpleTy) {
971     default:
972       llvm_unreachable("Unexpected size");
973     case MVT::bf16:
974       assert(Subtarget->hasStdExtZfbfmin());
975       Opc = RISCV::FMV_H_X;
976       break;
977     case MVT::f16:
978       Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
979       break;
980     case MVT::f32:
981       Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
982       break;
983     case MVT::f64:
984       // For RV32, we can't move from a GPR, we need to convert instead. This
985       // should only happen for +0.0 and -0.0.
986       assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
987       if (Is64Bit)
988         Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
989       else
990         Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
991       break;
992     }
993 
994     SDNode *Res;
995     if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
996       Res = CurDAG->getMachineNode(
997           Opc, DL, VT, Imm,
998           CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
999     else
1000       Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1001 
1002     // For f64 -0.0, we need to insert a fneg.d idiom.
1003     if (NegZeroF64) {
1004       Opc = RISCV::FSGNJN_D;
1005       if (HasZdinx)
1006         Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1007       Res =
1008           CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1009     }
1010 
1011     ReplaceNode(Node, Res);
1012     return;
1013   }
1014   case RISCVISD::BuildPairF64: {
1015     if (!Subtarget->hasStdExtZdinx())
1016       break;
1017 
1018     assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1019 
1020     SDValue Ops[] = {
1021         CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1022         Node->getOperand(0),
1023         CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1024         Node->getOperand(1),
1025         CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1026 
1027     SDNode *N =
1028         CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1029     ReplaceNode(Node, N);
1030     return;
1031   }
1032   case RISCVISD::SplitF64: {
1033     if (Subtarget->hasStdExtZdinx()) {
1034       assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1035 
1036       if (!SDValue(Node, 0).use_empty()) {
1037         SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1038                                                     Node->getOperand(0));
1039         ReplaceUses(SDValue(Node, 0), Lo);
1040       }
1041 
1042       if (!SDValue(Node, 1).use_empty()) {
1043         SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1044                                                     Node->getOperand(0));
1045         ReplaceUses(SDValue(Node, 1), Hi);
1046       }
1047 
1048       CurDAG->RemoveDeadNode(Node);
1049       return;
1050     }
1051 
1052     if (!Subtarget->hasStdExtZfa())
1053       break;
1054     assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1055            "Unexpected subtarget");
1056 
1057     // With Zfa, lower to fmv.x.w and fmvh.x.d.
1058     if (!SDValue(Node, 0).use_empty()) {
1059       SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1060                                           Node->getOperand(0));
1061       ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1062     }
1063     if (!SDValue(Node, 1).use_empty()) {
1064       SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1065                                           Node->getOperand(0));
1066       ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1067     }
1068 
1069     CurDAG->RemoveDeadNode(Node);
1070     return;
1071   }
1072   case ISD::SHL: {
1073     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1074     if (!N1C)
1075       break;
1076     SDValue N0 = Node->getOperand(0);
1077     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1078         !isa<ConstantSDNode>(N0.getOperand(1)))
1079       break;
1080     unsigned ShAmt = N1C->getZExtValue();
1081     uint64_t Mask = N0.getConstantOperandVal(1);
1082 
1083     // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1084     // 32 leading zeros and C3 trailing zeros.
1085     if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1086       unsigned XLen = Subtarget->getXLen();
1087       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1088       unsigned TrailingZeros = llvm::countr_zero(Mask);
1089       if (TrailingZeros > 0 && LeadingZeros == 32) {
1090         SDNode *SRLIW = CurDAG->getMachineNode(
1091             RISCV::SRLIW, DL, VT, N0->getOperand(0),
1092             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1093         SDNode *SLLI = CurDAG->getMachineNode(
1094             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1095             CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1096         ReplaceNode(Node, SLLI);
1097         return;
1098       }
1099     }
1100     break;
1101   }
1102   case ISD::SRL: {
1103     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1104     if (!N1C)
1105       break;
1106     SDValue N0 = Node->getOperand(0);
1107     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1108       break;
1109     unsigned ShAmt = N1C->getZExtValue();
1110     uint64_t Mask = N0.getConstantOperandVal(1);
1111 
1112     // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1113     // 32 leading zeros and C3 trailing zeros.
1114     if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1115       unsigned XLen = Subtarget->getXLen();
1116       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1117       unsigned TrailingZeros = llvm::countr_zero(Mask);
1118       if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1119         SDNode *SRLIW = CurDAG->getMachineNode(
1120             RISCV::SRLIW, DL, VT, N0->getOperand(0),
1121             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1122         SDNode *SLLI = CurDAG->getMachineNode(
1123             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1124             CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1125         ReplaceNode(Node, SLLI);
1126         return;
1127       }
1128     }
1129 
1130     // Optimize (srl (and X, C2), C) ->
1131     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
1132     // Where C2 is a mask with C3 trailing ones.
1133     // Taking into account that the C2 may have had lower bits unset by
1134     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1135     // This pattern occurs when type legalizing right shifts for types with
1136     // less than XLen bits.
1137     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1138     if (!isMask_64(Mask))
1139       break;
1140     unsigned TrailingOnes = llvm::countr_one(Mask);
1141     if (ShAmt >= TrailingOnes)
1142       break;
1143     // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1144     if (TrailingOnes == 32) {
1145       SDNode *SRLI = CurDAG->getMachineNode(
1146           Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1147           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1148       ReplaceNode(Node, SRLI);
1149       return;
1150     }
1151 
1152     // Only do the remaining transforms if the AND has one use.
1153     if (!N0.hasOneUse())
1154       break;
1155 
1156     // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1157     if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1158       SDNode *BEXTI = CurDAG->getMachineNode(
1159           Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1160           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1161       ReplaceNode(Node, BEXTI);
1162       return;
1163     }
1164 
1165     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1166     SDNode *SLLI =
1167         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1168                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1169     SDNode *SRLI = CurDAG->getMachineNode(
1170         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1171         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1172     ReplaceNode(Node, SRLI);
1173     return;
1174   }
1175   case ISD::SRA: {
1176     if (trySignedBitfieldExtract(Node))
1177       return;
1178 
1179     // Optimize (sra (sext_inreg X, i16), C) ->
1180     //          (srai (slli X, (XLen-16), (XLen-16) + C)
1181     // And      (sra (sext_inreg X, i8), C) ->
1182     //          (srai (slli X, (XLen-8), (XLen-8) + C)
1183     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1184     // This transform matches the code we get without Zbb. The shifts are more
1185     // compressible, and this can help expose CSE opportunities in the sdiv by
1186     // constant optimization.
1187     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1188     if (!N1C)
1189       break;
1190     SDValue N0 = Node->getOperand(0);
1191     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1192       break;
1193     unsigned ShAmt = N1C->getZExtValue();
1194     unsigned ExtSize =
1195         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1196     // ExtSize of 32 should use sraiw via tablegen pattern.
1197     if (ExtSize >= 32 || ShAmt >= ExtSize)
1198       break;
1199     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1200     SDNode *SLLI =
1201         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1202                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1203     SDNode *SRAI = CurDAG->getMachineNode(
1204         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1205         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1206     ReplaceNode(Node, SRAI);
1207     return;
1208   }
1209   case ISD::OR:
1210   case ISD::XOR:
1211     if (tryShrinkShlLogicImm(Node))
1212       return;
1213 
1214     break;
1215   case ISD::AND: {
1216     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1217     if (!N1C)
1218       break;
1219     uint64_t C1 = N1C->getZExtValue();
1220     const bool isC1Mask = isMask_64(C1);
1221     const bool isC1ANDI = isInt<12>(C1);
1222 
1223     SDValue N0 = Node->getOperand(0);
1224 
1225     auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1226                                           SDValue X, unsigned Msb,
1227                                           unsigned Lsb) {
1228       if (!Subtarget->hasVendorXTHeadBb())
1229         return false;
1230 
1231       SDNode *TH_EXTU = CurDAG->getMachineNode(
1232           RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1233           CurDAG->getTargetConstant(Lsb, DL, VT));
1234       ReplaceNode(Node, TH_EXTU);
1235       return true;
1236     };
1237 
1238     bool LeftShift = N0.getOpcode() == ISD::SHL;
1239     if (LeftShift || N0.getOpcode() == ISD::SRL) {
1240       auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1241       if (!C)
1242         break;
1243       unsigned C2 = C->getZExtValue();
1244       unsigned XLen = Subtarget->getXLen();
1245       assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1246 
1247       // Keep track of whether this is a c.andi. If we can't use c.andi, the
1248       // shift pair might offer more compression opportunities.
1249       // TODO: We could check for C extension here, but we don't have many lit
1250       // tests with the C extension enabled so not checking gets better
1251       // coverage.
1252       // TODO: What if ANDI faster than shift?
1253       bool IsCANDI = isInt<6>(N1C->getSExtValue());
1254 
1255       // Clear irrelevant bits in the mask.
1256       if (LeftShift)
1257         C1 &= maskTrailingZeros<uint64_t>(C2);
1258       else
1259         C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1260 
1261       // Some transforms should only be done if the shift has a single use or
1262       // the AND would become (srli (slli X, 32), 32)
1263       bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1264 
1265       SDValue X = N0.getOperand(0);
1266 
1267       // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1268       // with c3 leading zeros.
1269       if (!LeftShift && isC1Mask) {
1270         unsigned Leading = XLen - llvm::bit_width(C1);
1271         if (C2 < Leading) {
1272           // If the number of leading zeros is C2+32 this can be SRLIW.
1273           if (C2 + 32 == Leading) {
1274             SDNode *SRLIW = CurDAG->getMachineNode(
1275                 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1276             ReplaceNode(Node, SRLIW);
1277             return;
1278           }
1279 
1280           // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1281           // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1282           //
1283           // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1284           // legalized and goes through DAG combine.
1285           if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1286               X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287               cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1288             SDNode *SRAIW =
1289                 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1290                                        CurDAG->getTargetConstant(31, DL, VT));
1291             SDNode *SRLIW = CurDAG->getMachineNode(
1292                 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1293                 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1294             ReplaceNode(Node, SRLIW);
1295             return;
1296           }
1297 
1298           // Try to use an unsigned bitfield extract (e.g., th.extu) if
1299           // available.
1300           // Transform (and (srl x, C2), C1)
1301           //        -> (<bfextract> x, msb, lsb)
1302           //
1303           // Make sure to keep this below the SRLIW cases, as we always want to
1304           // prefer the more common instruction.
1305           const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1306           const unsigned Lsb = C2;
1307           if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1308             return;
1309 
1310           // (srli (slli x, c3-c2), c3).
1311           // Skip if we could use (zext.w (sraiw X, C2)).
1312           bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1313                       X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1314                       cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1315           // Also Skip if we can use bexti or th.tst.
1316           Skip |= HasBitTest && Leading == XLen - 1;
1317           if (OneUseOrZExtW && !Skip) {
1318             SDNode *SLLI = CurDAG->getMachineNode(
1319                 RISCV::SLLI, DL, VT, X,
1320                 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1321             SDNode *SRLI = CurDAG->getMachineNode(
1322                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1323                 CurDAG->getTargetConstant(Leading, DL, VT));
1324             ReplaceNode(Node, SRLI);
1325             return;
1326           }
1327         }
1328       }
1329 
1330       // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1331       // shifted by c2 bits with c3 leading zeros.
1332       if (LeftShift && isShiftedMask_64(C1)) {
1333         unsigned Leading = XLen - llvm::bit_width(C1);
1334 
1335         if (C2 + Leading < XLen &&
1336             C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1337           // Use slli.uw when possible.
1338           if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1339             SDNode *SLLI_UW =
1340                 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1341                                        CurDAG->getTargetConstant(C2, DL, VT));
1342             ReplaceNode(Node, SLLI_UW);
1343             return;
1344           }
1345 
1346           // (srli (slli c2+c3), c3)
1347           if (OneUseOrZExtW && !IsCANDI) {
1348             SDNode *SLLI = CurDAG->getMachineNode(
1349                 RISCV::SLLI, DL, VT, X,
1350                 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1351             SDNode *SRLI = CurDAG->getMachineNode(
1352                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1353                 CurDAG->getTargetConstant(Leading, DL, VT));
1354             ReplaceNode(Node, SRLI);
1355             return;
1356           }
1357         }
1358       }
1359 
1360       // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1361       // shifted mask with c2 leading zeros and c3 trailing zeros.
1362       if (!LeftShift && isShiftedMask_64(C1)) {
1363         unsigned Leading = XLen - llvm::bit_width(C1);
1364         unsigned Trailing = llvm::countr_zero(C1);
1365         if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1366             !IsCANDI) {
1367           unsigned SrliOpc = RISCV::SRLI;
1368           // If the input is zexti32 we should use SRLIW.
1369           if (X.getOpcode() == ISD::AND &&
1370               isa<ConstantSDNode>(X.getOperand(1)) &&
1371               X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1372             SrliOpc = RISCV::SRLIW;
1373             X = X.getOperand(0);
1374           }
1375           SDNode *SRLI = CurDAG->getMachineNode(
1376               SrliOpc, DL, VT, X,
1377               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1378           SDNode *SLLI = CurDAG->getMachineNode(
1379               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1380               CurDAG->getTargetConstant(Trailing, DL, VT));
1381           ReplaceNode(Node, SLLI);
1382           return;
1383         }
1384         // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1385         if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1386             OneUseOrZExtW && !IsCANDI) {
1387           SDNode *SRLIW = CurDAG->getMachineNode(
1388               RISCV::SRLIW, DL, VT, X,
1389               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1390           SDNode *SLLI = CurDAG->getMachineNode(
1391               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1392               CurDAG->getTargetConstant(Trailing, DL, VT));
1393           ReplaceNode(Node, SLLI);
1394           return;
1395         }
1396       }
1397 
1398       // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1399       // shifted mask with no leading zeros and c3 trailing zeros.
1400       if (LeftShift && isShiftedMask_64(C1)) {
1401         unsigned Leading = XLen - llvm::bit_width(C1);
1402         unsigned Trailing = llvm::countr_zero(C1);
1403         if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1404           SDNode *SRLI = CurDAG->getMachineNode(
1405               RISCV::SRLI, DL, VT, X,
1406               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1407           SDNode *SLLI = CurDAG->getMachineNode(
1408               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1409               CurDAG->getTargetConstant(Trailing, DL, VT));
1410           ReplaceNode(Node, SLLI);
1411           return;
1412         }
1413         // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1414         if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1415           SDNode *SRLIW = CurDAG->getMachineNode(
1416               RISCV::SRLIW, DL, VT, X,
1417               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1418           SDNode *SLLI = CurDAG->getMachineNode(
1419               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1420               CurDAG->getTargetConstant(Trailing, DL, VT));
1421           ReplaceNode(Node, SLLI);
1422           return;
1423         }
1424 
1425         // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1426         if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1427             Subtarget->hasStdExtZba()) {
1428           SDNode *SRLI = CurDAG->getMachineNode(
1429               RISCV::SRLI, DL, VT, X,
1430               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1431           SDNode *SLLI_UW = CurDAG->getMachineNode(
1432               RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1433               CurDAG->getTargetConstant(Trailing, DL, VT));
1434           ReplaceNode(Node, SLLI_UW);
1435           return;
1436         }
1437       }
1438     }
1439 
1440     // If C1 masks off the upper bits only (but can't be formed as an
1441     // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1442     // available.
1443     // Transform (and x, C1)
1444     //        -> (<bfextract> x, msb, lsb)
1445     if (isC1Mask && !isC1ANDI) {
1446       const unsigned Msb = llvm::bit_width(C1) - 1;
1447       if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1448         return;
1449     }
1450 
1451     if (tryShrinkShlLogicImm(Node))
1452       return;
1453 
1454     break;
1455   }
1456   case ISD::MUL: {
1457     // Special case for calculating (mul (and X, C2), C1) where the full product
1458     // fits in XLen bits. We can shift X left by the number of leading zeros in
1459     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1460     // product has XLen trailing zeros, putting it in the output of MULHU. This
1461     // can avoid materializing a constant in a register for C2.
1462 
1463     // RHS should be a constant.
1464     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1465     if (!N1C || !N1C->hasOneUse())
1466       break;
1467 
1468     // LHS should be an AND with constant.
1469     SDValue N0 = Node->getOperand(0);
1470     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1471       break;
1472 
1473     uint64_t C2 = N0.getConstantOperandVal(1);
1474 
1475     // Constant should be a mask.
1476     if (!isMask_64(C2))
1477       break;
1478 
1479     // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1480     // multiple users or the constant is a simm12. This prevents inserting a
1481     // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1482     // make it more costly to materialize. Otherwise, using a SLLI might allow
1483     // it to be compressed.
1484     bool IsANDIOrZExt =
1485         isInt<12>(C2) ||
1486         (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1487     // With XTHeadBb, we can use TH.EXTU.
1488     IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1489     if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1490       break;
1491     // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1492     // the constant is a simm32.
1493     bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1494     // With XTHeadBb, we can use TH.EXTU.
1495     IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1496     if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1497       break;
1498 
1499     // We need to shift left the AND input and C1 by a total of XLen bits.
1500 
1501     // How far left do we need to shift the AND input?
1502     unsigned XLen = Subtarget->getXLen();
1503     unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1504 
1505     // The constant gets shifted by the remaining amount unless that would
1506     // shift bits out.
1507     uint64_t C1 = N1C->getZExtValue();
1508     unsigned ConstantShift = XLen - LeadingZeros;
1509     if (ConstantShift > (XLen - llvm::bit_width(C1)))
1510       break;
1511 
1512     uint64_t ShiftedC1 = C1 << ConstantShift;
1513     // If this RV32, we need to sign extend the constant.
1514     if (XLen == 32)
1515       ShiftedC1 = SignExtend64<32>(ShiftedC1);
1516 
1517     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1518     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1519     SDNode *SLLI =
1520         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1521                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1522     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1523                                            SDValue(SLLI, 0), SDValue(Imm, 0));
1524     ReplaceNode(Node, MULHU);
1525     return;
1526   }
1527   case ISD::LOAD: {
1528     if (tryIndexedLoad(Node))
1529       return;
1530 
1531     if (Subtarget->hasVendorXCVmem()) {
1532       // We match post-incrementing load here
1533       LoadSDNode *Load = cast<LoadSDNode>(Node);
1534       if (Load->getAddressingMode() != ISD::POST_INC)
1535         break;
1536 
1537       SDValue Chain = Node->getOperand(0);
1538       SDValue Base = Node->getOperand(1);
1539       SDValue Offset = Node->getOperand(2);
1540 
1541       bool Simm12 = false;
1542       bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1543 
1544       if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1545         int ConstantVal = ConstantOffset->getSExtValue();
1546         Simm12 = isInt<12>(ConstantVal);
1547         if (Simm12)
1548           Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1549                                              Offset.getValueType());
1550       }
1551 
1552       unsigned Opcode = 0;
1553       switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1554       case MVT::i8:
1555         if (Simm12 && SignExtend)
1556           Opcode = RISCV::CV_LB_ri_inc;
1557         else if (Simm12 && !SignExtend)
1558           Opcode = RISCV::CV_LBU_ri_inc;
1559         else if (!Simm12 && SignExtend)
1560           Opcode = RISCV::CV_LB_rr_inc;
1561         else
1562           Opcode = RISCV::CV_LBU_rr_inc;
1563         break;
1564       case MVT::i16:
1565         if (Simm12 && SignExtend)
1566           Opcode = RISCV::CV_LH_ri_inc;
1567         else if (Simm12 && !SignExtend)
1568           Opcode = RISCV::CV_LHU_ri_inc;
1569         else if (!Simm12 && SignExtend)
1570           Opcode = RISCV::CV_LH_rr_inc;
1571         else
1572           Opcode = RISCV::CV_LHU_rr_inc;
1573         break;
1574       case MVT::i32:
1575         if (Simm12)
1576           Opcode = RISCV::CV_LW_ri_inc;
1577         else
1578           Opcode = RISCV::CV_LW_rr_inc;
1579         break;
1580       default:
1581         break;
1582       }
1583       if (!Opcode)
1584         break;
1585 
1586       ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1587                                                Chain.getSimpleValueType(), Base,
1588                                                Offset, Chain));
1589       return;
1590     }
1591     break;
1592   }
1593   case ISD::INTRINSIC_WO_CHAIN: {
1594     unsigned IntNo = Node->getConstantOperandVal(0);
1595     switch (IntNo) {
1596       // By default we do not custom select any intrinsic.
1597     default:
1598       break;
1599     case Intrinsic::riscv_vmsgeu:
1600     case Intrinsic::riscv_vmsge: {
1601       SDValue Src1 = Node->getOperand(1);
1602       SDValue Src2 = Node->getOperand(2);
1603       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1604       bool IsCmpUnsignedZero = false;
1605       // Only custom select scalar second operand.
1606       if (Src2.getValueType() != XLenVT)
1607         break;
1608       // Small constants are handled with patterns.
1609       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1610         int64_t CVal = C->getSExtValue();
1611         if (CVal >= -15 && CVal <= 16) {
1612           if (!IsUnsigned || CVal != 0)
1613             break;
1614           IsCmpUnsignedZero = true;
1615         }
1616       }
1617       MVT Src1VT = Src1.getSimpleValueType();
1618       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1619       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1620       default:
1621         llvm_unreachable("Unexpected LMUL!");
1622 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b)            \
1623   case RISCVII::VLMUL::lmulenum:                                               \
1624     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1625                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1626     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1627     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b;                             \
1628     break;
1629         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1630         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1631         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1632         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
1633         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1634         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1635         CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1636 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1637       }
1638       SDValue SEW = CurDAG->getTargetConstant(
1639           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1640       SDValue VL;
1641       selectVLOp(Node->getOperand(3), VL);
1642 
1643       // If vmsgeu with 0 immediate, expand it to vmset.
1644       if (IsCmpUnsignedZero) {
1645         ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1646         return;
1647       }
1648 
1649       // Expand to
1650       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1651       SDValue Cmp = SDValue(
1652           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1653           0);
1654       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1655                                                {Cmp, Cmp, VL, SEW}));
1656       return;
1657     }
1658     case Intrinsic::riscv_vmsgeu_mask:
1659     case Intrinsic::riscv_vmsge_mask: {
1660       SDValue Src1 = Node->getOperand(2);
1661       SDValue Src2 = Node->getOperand(3);
1662       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1663       bool IsCmpUnsignedZero = false;
1664       // Only custom select scalar second operand.
1665       if (Src2.getValueType() != XLenVT)
1666         break;
1667       // Small constants are handled with patterns.
1668       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1669         int64_t CVal = C->getSExtValue();
1670         if (CVal >= -15 && CVal <= 16) {
1671           if (!IsUnsigned || CVal != 0)
1672             break;
1673           IsCmpUnsignedZero = true;
1674         }
1675       }
1676       MVT Src1VT = Src1.getSimpleValueType();
1677       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1678           VMOROpcode;
1679       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1680       default:
1681         llvm_unreachable("Unexpected LMUL!");
1682 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b)                         \
1683   case RISCVII::VLMUL::lmulenum:                                               \
1684     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1685                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1686     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1687                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1688     break;
1689         CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1690         CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1691         CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1692         CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1693         CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1694         CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1695         CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1696 #undef CASE_VMSLT_OPCODES
1697       }
1698       // Mask operations use the LMUL from the mask type.
1699       switch (RISCVTargetLowering::getLMUL(VT)) {
1700       default:
1701         llvm_unreachable("Unexpected LMUL!");
1702 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
1703   case RISCVII::VLMUL::lmulenum:                                               \
1704     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
1705     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
1706     VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
1707     break;
1708         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1709         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1710         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1711         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
1712         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
1713         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
1714         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
1715 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1716       }
1717       SDValue SEW = CurDAG->getTargetConstant(
1718           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1719       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1720       SDValue VL;
1721       selectVLOp(Node->getOperand(5), VL);
1722       SDValue MaskedOff = Node->getOperand(1);
1723       SDValue Mask = Node->getOperand(4);
1724 
1725       // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1726       if (IsCmpUnsignedZero) {
1727         // We don't need vmor if the MaskedOff and the Mask are the same
1728         // value.
1729         if (Mask == MaskedOff) {
1730           ReplaceUses(Node, Mask.getNode());
1731           return;
1732         }
1733         ReplaceNode(Node,
1734                     CurDAG->getMachineNode(VMOROpcode, DL, VT,
1735                                            {Mask, MaskedOff, VL, MaskSEW}));
1736         return;
1737       }
1738 
1739       // If the MaskedOff value and the Mask are the same value use
1740       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
1741       // This avoids needing to copy v0 to vd before starting the next sequence.
1742       if (Mask == MaskedOff) {
1743         SDValue Cmp = SDValue(
1744             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1745             0);
1746         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1747                                                  {Mask, Cmp, VL, MaskSEW}));
1748         return;
1749       }
1750 
1751       // Mask needs to be copied to V0.
1752       SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1753                                            RISCV::V0, Mask, SDValue());
1754       SDValue Glue = Chain.getValue(1);
1755       SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1756 
1757       // Otherwise use
1758       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1759       // The result is mask undisturbed.
1760       // We use the same instructions to emulate mask agnostic behavior, because
1761       // the agnostic result can be either undisturbed or all 1.
1762       SDValue Cmp = SDValue(
1763           CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1764                                  {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1765           0);
1766       // vmxor.mm vd, vd, v0 is used to update active value.
1767       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1768                                                {Cmp, Mask, VL, MaskSEW}));
1769       return;
1770     }
1771     case Intrinsic::riscv_vsetvli:
1772     case Intrinsic::riscv_vsetvlimax:
1773       return selectVSETVLI(Node);
1774     }
1775     break;
1776   }
1777   case ISD::INTRINSIC_W_CHAIN: {
1778     unsigned IntNo = Node->getConstantOperandVal(1);
1779     switch (IntNo) {
1780       // By default we do not custom select any intrinsic.
1781     default:
1782       break;
1783     case Intrinsic::riscv_vlseg2:
1784     case Intrinsic::riscv_vlseg3:
1785     case Intrinsic::riscv_vlseg4:
1786     case Intrinsic::riscv_vlseg5:
1787     case Intrinsic::riscv_vlseg6:
1788     case Intrinsic::riscv_vlseg7:
1789     case Intrinsic::riscv_vlseg8: {
1790       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1791       return;
1792     }
1793     case Intrinsic::riscv_vlseg2_mask:
1794     case Intrinsic::riscv_vlseg3_mask:
1795     case Intrinsic::riscv_vlseg4_mask:
1796     case Intrinsic::riscv_vlseg5_mask:
1797     case Intrinsic::riscv_vlseg6_mask:
1798     case Intrinsic::riscv_vlseg7_mask:
1799     case Intrinsic::riscv_vlseg8_mask: {
1800       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1801       return;
1802     }
1803     case Intrinsic::riscv_vlsseg2:
1804     case Intrinsic::riscv_vlsseg3:
1805     case Intrinsic::riscv_vlsseg4:
1806     case Intrinsic::riscv_vlsseg5:
1807     case Intrinsic::riscv_vlsseg6:
1808     case Intrinsic::riscv_vlsseg7:
1809     case Intrinsic::riscv_vlsseg8: {
1810       selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1811       return;
1812     }
1813     case Intrinsic::riscv_vlsseg2_mask:
1814     case Intrinsic::riscv_vlsseg3_mask:
1815     case Intrinsic::riscv_vlsseg4_mask:
1816     case Intrinsic::riscv_vlsseg5_mask:
1817     case Intrinsic::riscv_vlsseg6_mask:
1818     case Intrinsic::riscv_vlsseg7_mask:
1819     case Intrinsic::riscv_vlsseg8_mask: {
1820       selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1821       return;
1822     }
1823     case Intrinsic::riscv_vloxseg2:
1824     case Intrinsic::riscv_vloxseg3:
1825     case Intrinsic::riscv_vloxseg4:
1826     case Intrinsic::riscv_vloxseg5:
1827     case Intrinsic::riscv_vloxseg6:
1828     case Intrinsic::riscv_vloxseg7:
1829     case Intrinsic::riscv_vloxseg8:
1830       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1831       return;
1832     case Intrinsic::riscv_vluxseg2:
1833     case Intrinsic::riscv_vluxseg3:
1834     case Intrinsic::riscv_vluxseg4:
1835     case Intrinsic::riscv_vluxseg5:
1836     case Intrinsic::riscv_vluxseg6:
1837     case Intrinsic::riscv_vluxseg7:
1838     case Intrinsic::riscv_vluxseg8:
1839       selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1840       return;
1841     case Intrinsic::riscv_vloxseg2_mask:
1842     case Intrinsic::riscv_vloxseg3_mask:
1843     case Intrinsic::riscv_vloxseg4_mask:
1844     case Intrinsic::riscv_vloxseg5_mask:
1845     case Intrinsic::riscv_vloxseg6_mask:
1846     case Intrinsic::riscv_vloxseg7_mask:
1847     case Intrinsic::riscv_vloxseg8_mask:
1848       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1849       return;
1850     case Intrinsic::riscv_vluxseg2_mask:
1851     case Intrinsic::riscv_vluxseg3_mask:
1852     case Intrinsic::riscv_vluxseg4_mask:
1853     case Intrinsic::riscv_vluxseg5_mask:
1854     case Intrinsic::riscv_vluxseg6_mask:
1855     case Intrinsic::riscv_vluxseg7_mask:
1856     case Intrinsic::riscv_vluxseg8_mask:
1857       selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1858       return;
1859     case Intrinsic::riscv_vlseg8ff:
1860     case Intrinsic::riscv_vlseg7ff:
1861     case Intrinsic::riscv_vlseg6ff:
1862     case Intrinsic::riscv_vlseg5ff:
1863     case Intrinsic::riscv_vlseg4ff:
1864     case Intrinsic::riscv_vlseg3ff:
1865     case Intrinsic::riscv_vlseg2ff: {
1866       selectVLSEGFF(Node, /*IsMasked*/ false);
1867       return;
1868     }
1869     case Intrinsic::riscv_vlseg8ff_mask:
1870     case Intrinsic::riscv_vlseg7ff_mask:
1871     case Intrinsic::riscv_vlseg6ff_mask:
1872     case Intrinsic::riscv_vlseg5ff_mask:
1873     case Intrinsic::riscv_vlseg4ff_mask:
1874     case Intrinsic::riscv_vlseg3ff_mask:
1875     case Intrinsic::riscv_vlseg2ff_mask: {
1876       selectVLSEGFF(Node, /*IsMasked*/ true);
1877       return;
1878     }
1879     case Intrinsic::riscv_vloxei:
1880     case Intrinsic::riscv_vloxei_mask:
1881     case Intrinsic::riscv_vluxei:
1882     case Intrinsic::riscv_vluxei_mask: {
1883       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1884                       IntNo == Intrinsic::riscv_vluxei_mask;
1885       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1886                        IntNo == Intrinsic::riscv_vloxei_mask;
1887 
1888       MVT VT = Node->getSimpleValueType(0);
1889       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1890 
1891       unsigned CurOp = 2;
1892       SmallVector<SDValue, 8> Operands;
1893       Operands.push_back(Node->getOperand(CurOp++));
1894 
1895       MVT IndexVT;
1896       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1897                                  /*IsStridedOrIndexed*/ true, Operands,
1898                                  /*IsLoad=*/true, &IndexVT);
1899 
1900       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1901              "Element count mismatch");
1902 
1903       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1904       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1905       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1906       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1907         report_fatal_error("The V extension does not support EEW=64 for index "
1908                            "values when XLEN=32");
1909       }
1910       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1911           IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1912           static_cast<unsigned>(IndexLMUL));
1913       MachineSDNode *Load =
1914           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1915 
1916       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1917         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1918 
1919       ReplaceNode(Node, Load);
1920       return;
1921     }
1922     case Intrinsic::riscv_vlm:
1923     case Intrinsic::riscv_vle:
1924     case Intrinsic::riscv_vle_mask:
1925     case Intrinsic::riscv_vlse:
1926     case Intrinsic::riscv_vlse_mask: {
1927       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1928                       IntNo == Intrinsic::riscv_vlse_mask;
1929       bool IsStrided =
1930           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1931 
1932       MVT VT = Node->getSimpleValueType(0);
1933       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1934 
1935       // The riscv_vlm intrinsic are always tail agnostic and no passthru
1936       // operand at the IR level.  In pseudos, they have both policy and
1937       // passthru operand. The passthru operand is needed to track the
1938       // "tail undefined" state, and the policy is there just for
1939       // for consistency - it will always be "don't care" for the
1940       // unmasked form.
1941       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1942       unsigned CurOp = 2;
1943       SmallVector<SDValue, 8> Operands;
1944       if (HasPassthruOperand)
1945         Operands.push_back(Node->getOperand(CurOp++));
1946       else {
1947         // We eagerly lower to implicit_def (instead of undef), as we
1948         // otherwise fail to select nodes such as: nxv1i1 = undef
1949         SDNode *Passthru =
1950           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1951         Operands.push_back(SDValue(Passthru, 0));
1952       }
1953       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1954                                  Operands, /*IsLoad=*/true);
1955 
1956       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1957       const RISCV::VLEPseudo *P =
1958           RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1959                               static_cast<unsigned>(LMUL));
1960       MachineSDNode *Load =
1961           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1962 
1963       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1964         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1965 
1966       ReplaceNode(Node, Load);
1967       return;
1968     }
1969     case Intrinsic::riscv_vleff:
1970     case Intrinsic::riscv_vleff_mask: {
1971       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1972 
1973       MVT VT = Node->getSimpleValueType(0);
1974       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1975 
1976       unsigned CurOp = 2;
1977       SmallVector<SDValue, 7> Operands;
1978       Operands.push_back(Node->getOperand(CurOp++));
1979       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1980                                  /*IsStridedOrIndexed*/ false, Operands,
1981                                  /*IsLoad=*/true);
1982 
1983       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1984       const RISCV::VLEPseudo *P =
1985           RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1986                               Log2SEW, static_cast<unsigned>(LMUL));
1987       MachineSDNode *Load = CurDAG->getMachineNode(
1988           P->Pseudo, DL, Node->getVTList(), Operands);
1989       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1990         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1991 
1992       ReplaceNode(Node, Load);
1993       return;
1994     }
1995     }
1996     break;
1997   }
1998   case ISD::INTRINSIC_VOID: {
1999     unsigned IntNo = Node->getConstantOperandVal(1);
2000     switch (IntNo) {
2001     case Intrinsic::riscv_vsseg2:
2002     case Intrinsic::riscv_vsseg3:
2003     case Intrinsic::riscv_vsseg4:
2004     case Intrinsic::riscv_vsseg5:
2005     case Intrinsic::riscv_vsseg6:
2006     case Intrinsic::riscv_vsseg7:
2007     case Intrinsic::riscv_vsseg8: {
2008       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
2009       return;
2010     }
2011     case Intrinsic::riscv_vsseg2_mask:
2012     case Intrinsic::riscv_vsseg3_mask:
2013     case Intrinsic::riscv_vsseg4_mask:
2014     case Intrinsic::riscv_vsseg5_mask:
2015     case Intrinsic::riscv_vsseg6_mask:
2016     case Intrinsic::riscv_vsseg7_mask:
2017     case Intrinsic::riscv_vsseg8_mask: {
2018       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
2019       return;
2020     }
2021     case Intrinsic::riscv_vssseg2:
2022     case Intrinsic::riscv_vssseg3:
2023     case Intrinsic::riscv_vssseg4:
2024     case Intrinsic::riscv_vssseg5:
2025     case Intrinsic::riscv_vssseg6:
2026     case Intrinsic::riscv_vssseg7:
2027     case Intrinsic::riscv_vssseg8: {
2028       selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
2029       return;
2030     }
2031     case Intrinsic::riscv_vssseg2_mask:
2032     case Intrinsic::riscv_vssseg3_mask:
2033     case Intrinsic::riscv_vssseg4_mask:
2034     case Intrinsic::riscv_vssseg5_mask:
2035     case Intrinsic::riscv_vssseg6_mask:
2036     case Intrinsic::riscv_vssseg7_mask:
2037     case Intrinsic::riscv_vssseg8_mask: {
2038       selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
2039       return;
2040     }
2041     case Intrinsic::riscv_vsoxseg2:
2042     case Intrinsic::riscv_vsoxseg3:
2043     case Intrinsic::riscv_vsoxseg4:
2044     case Intrinsic::riscv_vsoxseg5:
2045     case Intrinsic::riscv_vsoxseg6:
2046     case Intrinsic::riscv_vsoxseg7:
2047     case Intrinsic::riscv_vsoxseg8:
2048       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
2049       return;
2050     case Intrinsic::riscv_vsuxseg2:
2051     case Intrinsic::riscv_vsuxseg3:
2052     case Intrinsic::riscv_vsuxseg4:
2053     case Intrinsic::riscv_vsuxseg5:
2054     case Intrinsic::riscv_vsuxseg6:
2055     case Intrinsic::riscv_vsuxseg7:
2056     case Intrinsic::riscv_vsuxseg8:
2057       selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
2058       return;
2059     case Intrinsic::riscv_vsoxseg2_mask:
2060     case Intrinsic::riscv_vsoxseg3_mask:
2061     case Intrinsic::riscv_vsoxseg4_mask:
2062     case Intrinsic::riscv_vsoxseg5_mask:
2063     case Intrinsic::riscv_vsoxseg6_mask:
2064     case Intrinsic::riscv_vsoxseg7_mask:
2065     case Intrinsic::riscv_vsoxseg8_mask:
2066       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2067       return;
2068     case Intrinsic::riscv_vsuxseg2_mask:
2069     case Intrinsic::riscv_vsuxseg3_mask:
2070     case Intrinsic::riscv_vsuxseg4_mask:
2071     case Intrinsic::riscv_vsuxseg5_mask:
2072     case Intrinsic::riscv_vsuxseg6_mask:
2073     case Intrinsic::riscv_vsuxseg7_mask:
2074     case Intrinsic::riscv_vsuxseg8_mask:
2075       selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2076       return;
2077     case Intrinsic::riscv_vsoxei:
2078     case Intrinsic::riscv_vsoxei_mask:
2079     case Intrinsic::riscv_vsuxei:
2080     case Intrinsic::riscv_vsuxei_mask: {
2081       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2082                       IntNo == Intrinsic::riscv_vsuxei_mask;
2083       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2084                        IntNo == Intrinsic::riscv_vsoxei_mask;
2085 
2086       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2087       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2088 
2089       unsigned CurOp = 2;
2090       SmallVector<SDValue, 8> Operands;
2091       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2092 
2093       MVT IndexVT;
2094       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2095                                  /*IsStridedOrIndexed*/ true, Operands,
2096                                  /*IsLoad=*/false, &IndexVT);
2097 
2098       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2099              "Element count mismatch");
2100 
2101       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2102       RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2103       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2104       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2105         report_fatal_error("The V extension does not support EEW=64 for index "
2106                            "values when XLEN=32");
2107       }
2108       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2109           IsMasked, IsOrdered, IndexLog2EEW,
2110           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2111       MachineSDNode *Store =
2112           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2113 
2114       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2115         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2116 
2117       ReplaceNode(Node, Store);
2118       return;
2119     }
2120     case Intrinsic::riscv_vsm:
2121     case Intrinsic::riscv_vse:
2122     case Intrinsic::riscv_vse_mask:
2123     case Intrinsic::riscv_vsse:
2124     case Intrinsic::riscv_vsse_mask: {
2125       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2126                       IntNo == Intrinsic::riscv_vsse_mask;
2127       bool IsStrided =
2128           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2129 
2130       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2131       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2132 
2133       unsigned CurOp = 2;
2134       SmallVector<SDValue, 8> Operands;
2135       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2136 
2137       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2138                                  Operands);
2139 
2140       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2141       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2142           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2143       MachineSDNode *Store =
2144           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2145       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2146         CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2147 
2148       ReplaceNode(Node, Store);
2149       return;
2150     }
2151     case Intrinsic::riscv_sf_vc_x_se:
2152     case Intrinsic::riscv_sf_vc_i_se:
2153       selectSF_VC_X_SE(Node);
2154       return;
2155     }
2156     break;
2157   }
2158   case ISD::BITCAST: {
2159     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2160     // Just drop bitcasts between vectors if both are fixed or both are
2161     // scalable.
2162     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2163         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2164       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2165       CurDAG->RemoveDeadNode(Node);
2166       return;
2167     }
2168     break;
2169   }
2170   case ISD::INSERT_SUBVECTOR: {
2171     SDValue V = Node->getOperand(0);
2172     SDValue SubV = Node->getOperand(1);
2173     SDLoc DL(SubV);
2174     auto Idx = Node->getConstantOperandVal(2);
2175     MVT SubVecVT = SubV.getSimpleValueType();
2176 
2177     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2178     MVT SubVecContainerVT = SubVecVT;
2179     // Establish the correct scalable-vector types for any fixed-length type.
2180     if (SubVecVT.isFixedLengthVector()) {
2181       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2182       TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2183       [[maybe_unused]] bool ExactlyVecRegSized =
2184           Subtarget->expandVScale(SubVecVT.getSizeInBits())
2185               .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2186       assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2187                                .getKnownMinValue()));
2188       assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2189     }
2190     MVT ContainerVT = VT;
2191     if (VT.isFixedLengthVector())
2192       ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2193 
2194     const auto *TRI = Subtarget->getRegisterInfo();
2195     unsigned SubRegIdx;
2196     std::tie(SubRegIdx, Idx) =
2197         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2198             ContainerVT, SubVecContainerVT, Idx, TRI);
2199 
2200     // If the Idx hasn't been completely eliminated then this is a subvector
2201     // insert which doesn't naturally align to a vector register. These must
2202     // be handled using instructions to manipulate the vector registers.
2203     if (Idx != 0)
2204       break;
2205 
2206     RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2207     [[maybe_unused]] bool IsSubVecPartReg =
2208         SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2209         SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2210         SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2211     assert((!IsSubVecPartReg || V.isUndef()) &&
2212            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2213            "the subvector is smaller than a full-sized register");
2214 
2215     // If we haven't set a SubRegIdx, then we must be going between
2216     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2217     if (SubRegIdx == RISCV::NoSubRegister) {
2218       unsigned InRegClassID =
2219           RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);
2220       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2221                  InRegClassID &&
2222              "Unexpected subvector extraction");
2223       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2224       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2225                                                DL, VT, SubV, RC);
2226       ReplaceNode(Node, NewNode);
2227       return;
2228     }
2229 
2230     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2231     ReplaceNode(Node, Insert.getNode());
2232     return;
2233   }
2234   case ISD::EXTRACT_SUBVECTOR: {
2235     SDValue V = Node->getOperand(0);
2236     auto Idx = Node->getConstantOperandVal(1);
2237     MVT InVT = V.getSimpleValueType();
2238     SDLoc DL(V);
2239 
2240     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2241     MVT SubVecContainerVT = VT;
2242     // Establish the correct scalable-vector types for any fixed-length type.
2243     if (VT.isFixedLengthVector()) {
2244       assert(Idx == 0);
2245       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2246     }
2247     if (InVT.isFixedLengthVector())
2248       InVT = TLI.getContainerForFixedLengthVector(InVT);
2249 
2250     const auto *TRI = Subtarget->getRegisterInfo();
2251     unsigned SubRegIdx;
2252     std::tie(SubRegIdx, Idx) =
2253         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2254             InVT, SubVecContainerVT, Idx, TRI);
2255 
2256     // If the Idx hasn't been completely eliminated then this is a subvector
2257     // extract which doesn't naturally align to a vector register. These must
2258     // be handled using instructions to manipulate the vector registers.
2259     if (Idx != 0)
2260       break;
2261 
2262     // If we haven't set a SubRegIdx, then we must be going between
2263     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2264     if (SubRegIdx == RISCV::NoSubRegister) {
2265       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2266       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2267                  InRegClassID &&
2268              "Unexpected subvector extraction");
2269       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2270       SDNode *NewNode =
2271           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2272       ReplaceNode(Node, NewNode);
2273       return;
2274     }
2275 
2276     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2277     ReplaceNode(Node, Extract.getNode());
2278     return;
2279   }
2280   case RISCVISD::VMV_S_X_VL:
2281   case RISCVISD::VFMV_S_F_VL:
2282   case RISCVISD::VMV_V_X_VL:
2283   case RISCVISD::VFMV_V_F_VL: {
2284     // Try to match splat of a scalar load to a strided load with stride of x0.
2285     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2286                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2287     if (!Node->getOperand(0).isUndef())
2288       break;
2289     SDValue Src = Node->getOperand(1);
2290     auto *Ld = dyn_cast<LoadSDNode>(Src);
2291     // Can't fold load update node because the second
2292     // output is used so that load update node can't be removed.
2293     if (!Ld || Ld->isIndexed())
2294       break;
2295     EVT MemVT = Ld->getMemoryVT();
2296     // The memory VT should be the same size as the element type.
2297     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2298       break;
2299     if (!IsProfitableToFold(Src, Node, Node) ||
2300         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2301       break;
2302 
2303     SDValue VL;
2304     if (IsScalarMove) {
2305       // We could deal with more VL if we update the VSETVLI insert pass to
2306       // avoid introducing more VSETVLI.
2307       if (!isOneConstant(Node->getOperand(2)))
2308         break;
2309       selectVLOp(Node->getOperand(2), VL);
2310     } else
2311       selectVLOp(Node->getOperand(2), VL);
2312 
2313     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2314     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2315 
2316     // If VL=1, then we don't need to do a strided load and can just do a
2317     // regular load.
2318     bool IsStrided = !isOneConstant(VL);
2319 
2320     // Only do a strided load if we have optimized zero-stride vector load.
2321     if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2322       break;
2323 
2324     SmallVector<SDValue> Operands = {
2325         SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2326         Ld->getBasePtr()};
2327     if (IsStrided)
2328       Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2329     uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
2330     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2331     Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2332 
2333     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2334     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2335         /*IsMasked*/ false, IsStrided, /*FF*/ false,
2336         Log2SEW, static_cast<unsigned>(LMUL));
2337     MachineSDNode *Load =
2338         CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2339     // Update the chain.
2340     ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2341     // Record the mem-refs
2342     CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2343     // Replace the splat with the vlse.
2344     ReplaceNode(Node, Load);
2345     return;
2346   }
2347   case ISD::PREFETCH:
2348     unsigned Locality = Node->getConstantOperandVal(3);
2349     if (Locality > 2)
2350       break;
2351 
2352     if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2353       MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2354       MMO->setFlags(MachineMemOperand::MONonTemporal);
2355 
2356       int NontemporalLevel = 0;
2357       switch (Locality) {
2358       case 0:
2359         NontemporalLevel = 3; // NTL.ALL
2360         break;
2361       case 1:
2362         NontemporalLevel = 1; // NTL.PALL
2363         break;
2364       case 2:
2365         NontemporalLevel = 0; // NTL.P1
2366         break;
2367       default:
2368         llvm_unreachable("unexpected locality value.");
2369       }
2370 
2371       if (NontemporalLevel & 0b1)
2372         MMO->setFlags(MONontemporalBit0);
2373       if (NontemporalLevel & 0b10)
2374         MMO->setFlags(MONontemporalBit1);
2375     }
2376     break;
2377   }
2378 
2379   // Select the default instruction.
2380   SelectCode(Node);
2381 }
2382 
SelectInlineAsmMemoryOperand(const SDValue & Op,InlineAsm::ConstraintCode ConstraintID,std::vector<SDValue> & OutOps)2383 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2384     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2385     std::vector<SDValue> &OutOps) {
2386   // Always produce a register and immediate operand, as expected by
2387   // RISCVAsmPrinter::PrintAsmMemoryOperand.
2388   switch (ConstraintID) {
2389   case InlineAsm::ConstraintCode::o:
2390   case InlineAsm::ConstraintCode::m: {
2391     SDValue Op0, Op1;
2392     [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2393     assert(Found && "SelectAddrRegImm should always succeed");
2394     OutOps.push_back(Op0);
2395     OutOps.push_back(Op1);
2396     return false;
2397   }
2398   case InlineAsm::ConstraintCode::A:
2399     OutOps.push_back(Op);
2400     OutOps.push_back(
2401         CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2402     return false;
2403   default:
2404     report_fatal_error("Unexpected asm memory constraint " +
2405                        InlineAsm::getMemConstraintName(ConstraintID));
2406   }
2407 
2408   return true;
2409 }
2410 
SelectAddrFrameIndex(SDValue Addr,SDValue & Base,SDValue & Offset)2411 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2412                                              SDValue &Offset) {
2413   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2414     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2415     Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2416     return true;
2417   }
2418 
2419   return false;
2420 }
2421 
2422 // Select a frame index and an optional immediate offset from an ADD or OR.
SelectFrameAddrRegImm(SDValue Addr,SDValue & Base,SDValue & Offset)2423 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
2424                                               SDValue &Offset) {
2425   if (SelectAddrFrameIndex(Addr, Base, Offset))
2426     return true;
2427 
2428   if (!CurDAG->isBaseWithConstantOffset(Addr))
2429     return false;
2430 
2431   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2432     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2433     if (isInt<12>(CVal)) {
2434       Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2435                                          Subtarget->getXLenVT());
2436       Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
2437                                          Subtarget->getXLenVT());
2438       return true;
2439     }
2440   }
2441 
2442   return false;
2443 }
2444 
2445 // Fold constant addresses.
selectConstantAddr(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,const RISCVSubtarget * Subtarget,SDValue Addr,SDValue & Base,SDValue & Offset,bool IsPrefetch=false)2446 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2447                                const MVT VT, const RISCVSubtarget *Subtarget,
2448                                SDValue Addr, SDValue &Base, SDValue &Offset,
2449                                bool IsPrefetch = false) {
2450   if (!isa<ConstantSDNode>(Addr))
2451     return false;
2452 
2453   int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2454 
2455   // If the constant is a simm12, we can fold the whole constant and use X0 as
2456   // the base. If the constant can be materialized with LUI+simm12, use LUI as
2457   // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2458   int64_t Lo12 = SignExtend64<12>(CVal);
2459   int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2460   if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2461     if (IsPrefetch && (Lo12 & 0b11111) != 0)
2462       return false;
2463 
2464     if (Hi) {
2465       int64_t Hi20 = (Hi >> 12) & 0xfffff;
2466       Base = SDValue(
2467           CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2468                                  CurDAG->getTargetConstant(Hi20, DL, VT)),
2469           0);
2470     } else {
2471       Base = CurDAG->getRegister(RISCV::X0, VT);
2472     }
2473     Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2474     return true;
2475   }
2476 
2477   // Ask how constant materialization would handle this constant.
2478   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2479 
2480   // If the last instruction would be an ADDI, we can fold its immediate and
2481   // emit the rest of the sequence as the base.
2482   if (Seq.back().getOpcode() != RISCV::ADDI)
2483     return false;
2484   Lo12 = Seq.back().getImm();
2485   if (IsPrefetch && (Lo12 & 0b11111) != 0)
2486     return false;
2487 
2488   // Drop the last instruction.
2489   Seq.pop_back();
2490   assert(!Seq.empty() && "Expected more instructions in sequence");
2491 
2492   Base = selectImmSeq(CurDAG, DL, VT, Seq);
2493   Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2494   return true;
2495 }
2496 
2497 // Is this ADD instruction only used as the base pointer of scalar loads and
2498 // stores?
isWorthFoldingAdd(SDValue Add)2499 static bool isWorthFoldingAdd(SDValue Add) {
2500   for (auto *Use : Add->uses()) {
2501     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2502         Use->getOpcode() != ISD::ATOMIC_LOAD &&
2503         Use->getOpcode() != ISD::ATOMIC_STORE)
2504       return false;
2505     EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2506     if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2507         VT != MVT::f64)
2508       return false;
2509     // Don't allow stores of the value. It must be used as the address.
2510     if (Use->getOpcode() == ISD::STORE &&
2511         cast<StoreSDNode>(Use)->getValue() == Add)
2512       return false;
2513     if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2514         cast<AtomicSDNode>(Use)->getVal() == Add)
2515       return false;
2516   }
2517 
2518   return true;
2519 }
2520 
SelectAddrRegRegScale(SDValue Addr,unsigned MaxShiftAmount,SDValue & Base,SDValue & Index,SDValue & Scale)2521 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2522                                               unsigned MaxShiftAmount,
2523                                               SDValue &Base, SDValue &Index,
2524                                               SDValue &Scale) {
2525   EVT VT = Addr.getSimpleValueType();
2526   auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2527                                               SDValue &Shift) {
2528     uint64_t ShiftAmt = 0;
2529     Index = N;
2530 
2531     if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2532       // Only match shifts by a value in range [0, MaxShiftAmount].
2533       if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2534         Index = N.getOperand(0);
2535         ShiftAmt = N.getConstantOperandVal(1);
2536       }
2537     }
2538 
2539     Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2540     return ShiftAmt != 0;
2541   };
2542 
2543   if (Addr.getOpcode() == ISD::ADD) {
2544     if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2545       SDValue AddrB = Addr.getOperand(0);
2546       if (AddrB.getOpcode() == ISD::ADD &&
2547           UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2548           !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2549           isInt<12>(C1->getSExtValue())) {
2550         // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2551         SDValue C1Val =
2552             CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2553         Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2554                                               AddrB.getOperand(1), C1Val),
2555                        0);
2556         return true;
2557       }
2558     } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2559       Base = Addr.getOperand(1);
2560       return true;
2561     } else {
2562       UnwrapShl(Addr.getOperand(1), Index, Scale);
2563       Base = Addr.getOperand(0);
2564       return true;
2565     }
2566   } else if (UnwrapShl(Addr, Index, Scale)) {
2567     EVT VT = Addr.getValueType();
2568     Base = CurDAG->getRegister(RISCV::X0, VT);
2569     return true;
2570   }
2571 
2572   return false;
2573 }
2574 
SelectAddrRegImm(SDValue Addr,SDValue & Base,SDValue & Offset,bool IsINX)2575 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2576                                          SDValue &Offset, bool IsINX) {
2577   if (SelectAddrFrameIndex(Addr, Base, Offset))
2578     return true;
2579 
2580   SDLoc DL(Addr);
2581   MVT VT = Addr.getSimpleValueType();
2582 
2583   if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2584     Base = Addr.getOperand(0);
2585     Offset = Addr.getOperand(1);
2586     return true;
2587   }
2588 
2589   int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2590   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2591     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2592     if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2593       Base = Addr.getOperand(0);
2594       if (Base.getOpcode() == RISCVISD::ADD_LO) {
2595         SDValue LoOperand = Base.getOperand(1);
2596         if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2597           // If the Lo in (ADD_LO hi, lo) is a global variable's address
2598           // (its low part, really), then we can rely on the alignment of that
2599           // variable to provide a margin of safety before low part can overflow
2600           // the 12 bits of the load/store offset. Check if CVal falls within
2601           // that margin; if so (low part + CVal) can't overflow.
2602           const DataLayout &DL = CurDAG->getDataLayout();
2603           Align Alignment = commonAlignment(
2604               GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2605           if (CVal == 0 || Alignment > CVal) {
2606             int64_t CombinedOffset = CVal + GA->getOffset();
2607             Base = Base.getOperand(0);
2608             Offset = CurDAG->getTargetGlobalAddress(
2609                 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2610                 CombinedOffset, GA->getTargetFlags());
2611             return true;
2612           }
2613         }
2614       }
2615 
2616       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2617         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2618       Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2619       return true;
2620     }
2621   }
2622 
2623   // Handle ADD with large immediates.
2624   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2625     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2626     assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2627            "simm12 not already handled?");
2628 
2629     // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2630     // an ADDI for part of the offset and fold the rest into the load/store.
2631     // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2632     if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2633       int64_t Adj = CVal < 0 ? -2048 : 2047;
2634       Base = SDValue(
2635           CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2636                                  CurDAG->getTargetConstant(Adj, DL, VT)),
2637           0);
2638       Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2639       return true;
2640     }
2641 
2642     // For larger immediates, we might be able to save one instruction from
2643     // constant materialization by folding the Lo12 bits of the immediate into
2644     // the address. We should only do this if the ADD is only used by loads and
2645     // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2646     // separately with the full materialized immediate creating extra
2647     // instructions.
2648     if (isWorthFoldingAdd(Addr) &&
2649         selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2650                            Offset)) {
2651       // Insert an ADD instruction with the materialized Hi52 bits.
2652       Base = SDValue(
2653           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2654           0);
2655       return true;
2656     }
2657   }
2658 
2659   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2660     return true;
2661 
2662   Base = Addr;
2663   Offset = CurDAG->getTargetConstant(0, DL, VT);
2664   return true;
2665 }
2666 
2667 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2668 /// Offset shoule be all zeros.
SelectAddrRegImmLsb00000(SDValue Addr,SDValue & Base,SDValue & Offset)2669 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2670                                                  SDValue &Offset) {
2671   if (SelectAddrFrameIndex(Addr, Base, Offset))
2672     return true;
2673 
2674   SDLoc DL(Addr);
2675   MVT VT = Addr.getSimpleValueType();
2676 
2677   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2678     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2679     if (isInt<12>(CVal)) {
2680       Base = Addr.getOperand(0);
2681 
2682       // Early-out if not a valid offset.
2683       if ((CVal & 0b11111) != 0) {
2684         Base = Addr;
2685         Offset = CurDAG->getTargetConstant(0, DL, VT);
2686         return true;
2687       }
2688 
2689       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2690         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2691       Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2692       return true;
2693     }
2694   }
2695 
2696   // Handle ADD with large immediates.
2697   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2698     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2699     assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2700            "simm12 not already handled?");
2701 
2702     // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2703     // one instruction by folding adjustment (-2048 or 2016) into the address.
2704     if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2705       int64_t Adj = CVal < 0 ? -2048 : 2016;
2706       int64_t AdjustedOffset = CVal - Adj;
2707       Base = SDValue(CurDAG->getMachineNode(
2708                          RISCV::ADDI, DL, VT, Addr.getOperand(0),
2709                          CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2710                      0);
2711       Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2712       return true;
2713     }
2714 
2715     if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2716                            Offset, true)) {
2717       // Insert an ADD instruction with the materialized Hi52 bits.
2718       Base = SDValue(
2719           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2720           0);
2721       return true;
2722     }
2723   }
2724 
2725   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2726     return true;
2727 
2728   Base = Addr;
2729   Offset = CurDAG->getTargetConstant(0, DL, VT);
2730   return true;
2731 }
2732 
SelectAddrRegReg(SDValue Addr,SDValue & Base,SDValue & Offset)2733 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
2734                                          SDValue &Offset) {
2735   if (Addr.getOpcode() != ISD::ADD)
2736     return false;
2737 
2738   if (isa<ConstantSDNode>(Addr.getOperand(1)))
2739     return false;
2740 
2741   Base = Addr.getOperand(1);
2742   Offset = Addr.getOperand(0);
2743   return true;
2744 }
2745 
selectShiftMask(SDValue N,unsigned ShiftWidth,SDValue & ShAmt)2746 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
2747                                         SDValue &ShAmt) {
2748   ShAmt = N;
2749 
2750   // Peek through zext.
2751   if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2752     ShAmt = ShAmt.getOperand(0);
2753 
2754   // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2755   // amount. If there is an AND on the shift amount, we can bypass it if it
2756   // doesn't affect any of those bits.
2757   if (ShAmt.getOpcode() == ISD::AND &&
2758       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2759     const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2760 
2761     // Since the max shift amount is a power of 2 we can subtract 1 to make a
2762     // mask that covers the bits needed to represent all shift amounts.
2763     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2764     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2765 
2766     if (ShMask.isSubsetOf(AndMask)) {
2767       ShAmt = ShAmt.getOperand(0);
2768     } else {
2769       // SimplifyDemandedBits may have optimized the mask so try restoring any
2770       // bits that are known zero.
2771       KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2772       if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2773         return true;
2774       ShAmt = ShAmt.getOperand(0);
2775     }
2776   }
2777 
2778   if (ShAmt.getOpcode() == ISD::ADD &&
2779       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2780     uint64_t Imm = ShAmt.getConstantOperandVal(1);
2781     // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2782     // to avoid the ADD.
2783     if (Imm != 0 && Imm % ShiftWidth == 0) {
2784       ShAmt = ShAmt.getOperand(0);
2785       return true;
2786     }
2787   } else if (ShAmt.getOpcode() == ISD::SUB &&
2788              isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2789     uint64_t Imm = ShAmt.getConstantOperandVal(0);
2790     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2791     // generate a NEG instead of a SUB of a constant.
2792     if (Imm != 0 && Imm % ShiftWidth == 0) {
2793       SDLoc DL(ShAmt);
2794       EVT VT = ShAmt.getValueType();
2795       SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2796       unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2797       MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2798                                                   ShAmt.getOperand(1));
2799       ShAmt = SDValue(Neg, 0);
2800       return true;
2801     }
2802     // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2803     // to generate a NOT instead of a SUB of a constant.
2804     if (Imm % ShiftWidth == ShiftWidth - 1) {
2805       SDLoc DL(ShAmt);
2806       EVT VT = ShAmt.getValueType();
2807       MachineSDNode *Not =
2808           CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2809                                  CurDAG->getTargetConstant(-1, DL, VT));
2810       ShAmt = SDValue(Not, 0);
2811       return true;
2812     }
2813   }
2814 
2815   return true;
2816 }
2817 
2818 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2819 /// check for equality with 0. This function emits instructions that convert the
2820 /// seteq/setne into something that can be compared with 0.
2821 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2822 /// ISD::SETNE).
selectSETCC(SDValue N,ISD::CondCode ExpectedCCVal,SDValue & Val)2823 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
2824                                     SDValue &Val) {
2825   assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2826          "Unexpected condition code!");
2827 
2828   // We're looking for a setcc.
2829   if (N->getOpcode() != ISD::SETCC)
2830     return false;
2831 
2832   // Must be an equality comparison.
2833   ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2834   if (CCVal != ExpectedCCVal)
2835     return false;
2836 
2837   SDValue LHS = N->getOperand(0);
2838   SDValue RHS = N->getOperand(1);
2839 
2840   if (!LHS.getValueType().isScalarInteger())
2841     return false;
2842 
2843   // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2844   if (isNullConstant(RHS)) {
2845     Val = LHS;
2846     return true;
2847   }
2848 
2849   SDLoc DL(N);
2850 
2851   if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2852     int64_t CVal = C->getSExtValue();
2853     // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2854     // non-zero otherwise.
2855     if (CVal == -2048) {
2856       Val =
2857           SDValue(CurDAG->getMachineNode(
2858                       RISCV::XORI, DL, N->getValueType(0), LHS,
2859                       CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2860                   0);
2861       return true;
2862     }
2863     // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2864     // LHS is equal to the RHS and non-zero otherwise.
2865     if (isInt<12>(CVal) || CVal == 2048) {
2866       Val =
2867           SDValue(CurDAG->getMachineNode(
2868                       RISCV::ADDI, DL, N->getValueType(0), LHS,
2869                       CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2870                   0);
2871       return true;
2872     }
2873   }
2874 
2875   // If nothing else we can XOR the LHS and RHS to produce zero if they are
2876   // equal and a non-zero value if they aren't.
2877   Val = SDValue(
2878       CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2879   return true;
2880 }
2881 
selectSExtBits(SDValue N,unsigned Bits,SDValue & Val)2882 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2883   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2884       cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2885     Val = N.getOperand(0);
2886     return true;
2887   }
2888 
2889   auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2890     if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2891       return N;
2892 
2893     SDValue N0 = N.getOperand(0);
2894     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2895         N.getConstantOperandVal(1) == ShiftAmt &&
2896         N0.getConstantOperandVal(1) == ShiftAmt)
2897       return N0.getOperand(0);
2898 
2899     return N;
2900   };
2901 
2902   MVT VT = N.getSimpleValueType();
2903   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2904     Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2905     return true;
2906   }
2907 
2908   return false;
2909 }
2910 
selectZExtBits(SDValue N,unsigned Bits,SDValue & Val)2911 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2912   if (N.getOpcode() == ISD::AND) {
2913     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2914     if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2915       Val = N.getOperand(0);
2916       return true;
2917     }
2918   }
2919   MVT VT = N.getSimpleValueType();
2920   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2921   if (CurDAG->MaskedValueIsZero(N, Mask)) {
2922     Val = N;
2923     return true;
2924   }
2925 
2926   return false;
2927 }
2928 
2929 /// Look for various patterns that can be done with a SHL that can be folded
2930 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2931 /// SHXADD we are trying to match.
selectSHXADDOp(SDValue N,unsigned ShAmt,SDValue & Val)2932 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
2933                                        SDValue &Val) {
2934   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2935     SDValue N0 = N.getOperand(0);
2936 
2937     bool LeftShift = N0.getOpcode() == ISD::SHL;
2938     if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2939         isa<ConstantSDNode>(N0.getOperand(1))) {
2940       uint64_t Mask = N.getConstantOperandVal(1);
2941       unsigned C2 = N0.getConstantOperandVal(1);
2942 
2943       unsigned XLen = Subtarget->getXLen();
2944       if (LeftShift)
2945         Mask &= maskTrailingZeros<uint64_t>(C2);
2946       else
2947         Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2948 
2949       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2950       // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2951       // followed by a SHXADD with c3 for the X amount.
2952       if (isShiftedMask_64(Mask)) {
2953         unsigned Leading = XLen - llvm::bit_width(Mask);
2954         unsigned Trailing = llvm::countr_zero(Mask);
2955         if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2956           SDLoc DL(N);
2957           EVT VT = N.getValueType();
2958           Val = SDValue(CurDAG->getMachineNode(
2959                             RISCV::SRLI, DL, VT, N0.getOperand(0),
2960                             CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2961                         0);
2962           return true;
2963         }
2964         // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2965         // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2966         // followed by a SHXADD using c3 for the X amount.
2967         if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2968           SDLoc DL(N);
2969           EVT VT = N.getValueType();
2970           Val = SDValue(
2971               CurDAG->getMachineNode(
2972                   RISCV::SRLI, DL, VT, N0.getOperand(0),
2973                   CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2974               0);
2975           return true;
2976         }
2977       }
2978     }
2979   }
2980 
2981   bool LeftShift = N.getOpcode() == ISD::SHL;
2982   if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2983       isa<ConstantSDNode>(N.getOperand(1))) {
2984     SDValue N0 = N.getOperand(0);
2985     if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2986         isa<ConstantSDNode>(N0.getOperand(1))) {
2987       uint64_t Mask = N0.getConstantOperandVal(1);
2988       if (isShiftedMask_64(Mask)) {
2989         unsigned C1 = N.getConstantOperandVal(1);
2990         unsigned XLen = Subtarget->getXLen();
2991         unsigned Leading = XLen - llvm::bit_width(Mask);
2992         unsigned Trailing = llvm::countr_zero(Mask);
2993         // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2994         // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2995         if (LeftShift && Leading == 32 && Trailing > 0 &&
2996             (Trailing + C1) == ShAmt) {
2997           SDLoc DL(N);
2998           EVT VT = N.getValueType();
2999           Val = SDValue(CurDAG->getMachineNode(
3000                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
3001                             CurDAG->getTargetConstant(Trailing, DL, VT)),
3002                         0);
3003           return true;
3004         }
3005         // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3006         // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3007         if (!LeftShift && Leading == 32 && Trailing > C1 &&
3008             (Trailing - C1) == ShAmt) {
3009           SDLoc DL(N);
3010           EVT VT = N.getValueType();
3011           Val = SDValue(CurDAG->getMachineNode(
3012                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
3013                             CurDAG->getTargetConstant(Trailing, DL, VT)),
3014                         0);
3015           return true;
3016         }
3017       }
3018     }
3019   }
3020 
3021   return false;
3022 }
3023 
3024 /// Look for various patterns that can be done with a SHL that can be folded
3025 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3026 /// SHXADD_UW we are trying to match.
selectSHXADD_UWOp(SDValue N,unsigned ShAmt,SDValue & Val)3027 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3028                                           SDValue &Val) {
3029   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3030       N.hasOneUse()) {
3031     SDValue N0 = N.getOperand(0);
3032     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3033         N0.hasOneUse()) {
3034       uint64_t Mask = N.getConstantOperandVal(1);
3035       unsigned C2 = N0.getConstantOperandVal(1);
3036 
3037       Mask &= maskTrailingZeros<uint64_t>(C2);
3038 
3039       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3040       // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3041       // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3042       if (isShiftedMask_64(Mask)) {
3043         unsigned Leading = llvm::countl_zero(Mask);
3044         unsigned Trailing = llvm::countr_zero(Mask);
3045         if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3046           SDLoc DL(N);
3047           EVT VT = N.getValueType();
3048           Val = SDValue(CurDAG->getMachineNode(
3049                             RISCV::SLLI, DL, VT, N0.getOperand(0),
3050                             CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3051                         0);
3052           return true;
3053         }
3054       }
3055     }
3056   }
3057 
3058   return false;
3059 }
3060 
vectorPseudoHasAllNBitUsers(SDNode * User,unsigned UserOpNo,unsigned Bits,const TargetInstrInfo * TII)3061 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3062                                         unsigned Bits,
3063                                         const TargetInstrInfo *TII) {
3064   unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3065 
3066   if (!MCOpcode)
3067     return false;
3068 
3069   const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3070   const uint64_t TSFlags = MCID.TSFlags;
3071   if (!RISCVII::hasSEWOp(TSFlags))
3072     return false;
3073   assert(RISCVII::hasVLOp(TSFlags));
3074 
3075   bool HasGlueOp = User->getGluedNode() != nullptr;
3076   unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3077   bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3078   bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3079   unsigned VLIdx =
3080       User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3081   const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3082 
3083   if (UserOpNo == VLIdx)
3084     return false;
3085 
3086   auto NumDemandedBits =
3087       RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3088   return NumDemandedBits && Bits >= *NumDemandedBits;
3089 }
3090 
3091 // Return true if all users of this SDNode* only consume the lower \p Bits.
3092 // This can be used to form W instructions for add/sub/mul/shl even when the
3093 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3094 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
3095 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3096 // the add/sub/mul/shl to become non-W instructions. By checking the users we
3097 // may be able to use a W instruction and CSE with the other instruction if
3098 // this has happened. We could try to detect that the CSE opportunity exists
3099 // before doing this, but that would be more complicated.
hasAllNBitUsers(SDNode * Node,unsigned Bits,const unsigned Depth) const3100 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3101                                         const unsigned Depth) const {
3102   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3103           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3104           Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3105           Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3106           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3107           isa<ConstantSDNode>(Node) || Depth != 0) &&
3108          "Unexpected opcode");
3109 
3110   if (Depth >= SelectionDAG::MaxRecursionDepth)
3111     return false;
3112 
3113   // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3114   // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3115   if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3116     return false;
3117 
3118   for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3119     SDNode *User = *UI;
3120     // Users of this node should have already been instruction selected
3121     if (!User->isMachineOpcode())
3122       return false;
3123 
3124     // TODO: Add more opcodes?
3125     switch (User->getMachineOpcode()) {
3126     default:
3127       if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3128         break;
3129       return false;
3130     case RISCV::ADDW:
3131     case RISCV::ADDIW:
3132     case RISCV::SUBW:
3133     case RISCV::MULW:
3134     case RISCV::SLLW:
3135     case RISCV::SLLIW:
3136     case RISCV::SRAW:
3137     case RISCV::SRAIW:
3138     case RISCV::SRLW:
3139     case RISCV::SRLIW:
3140     case RISCV::DIVW:
3141     case RISCV::DIVUW:
3142     case RISCV::REMW:
3143     case RISCV::REMUW:
3144     case RISCV::ROLW:
3145     case RISCV::RORW:
3146     case RISCV::RORIW:
3147     case RISCV::CLZW:
3148     case RISCV::CTZW:
3149     case RISCV::CPOPW:
3150     case RISCV::SLLI_UW:
3151     case RISCV::FMV_W_X:
3152     case RISCV::FCVT_H_W:
3153     case RISCV::FCVT_H_WU:
3154     case RISCV::FCVT_S_W:
3155     case RISCV::FCVT_S_WU:
3156     case RISCV::FCVT_D_W:
3157     case RISCV::FCVT_D_WU:
3158     case RISCV::TH_REVW:
3159     case RISCV::TH_SRRIW:
3160       if (Bits < 32)
3161         return false;
3162       break;
3163     case RISCV::SLL:
3164     case RISCV::SRA:
3165     case RISCV::SRL:
3166     case RISCV::ROL:
3167     case RISCV::ROR:
3168     case RISCV::BSET:
3169     case RISCV::BCLR:
3170     case RISCV::BINV:
3171       // Shift amount operands only use log2(Xlen) bits.
3172       if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3173         return false;
3174       break;
3175     case RISCV::SLLI:
3176       // SLLI only uses the lower (XLen - ShAmt) bits.
3177       if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3178         return false;
3179       break;
3180     case RISCV::ANDI:
3181       if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3182         break;
3183       goto RecCheck;
3184     case RISCV::ORI: {
3185       uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3186       if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3187         break;
3188       [[fallthrough]];
3189     }
3190     case RISCV::AND:
3191     case RISCV::OR:
3192     case RISCV::XOR:
3193     case RISCV::XORI:
3194     case RISCV::ANDN:
3195     case RISCV::ORN:
3196     case RISCV::XNOR:
3197     case RISCV::SH1ADD:
3198     case RISCV::SH2ADD:
3199     case RISCV::SH3ADD:
3200     RecCheck:
3201       if (hasAllNBitUsers(User, Bits, Depth + 1))
3202         break;
3203       return false;
3204     case RISCV::SRLI: {
3205       unsigned ShAmt = User->getConstantOperandVal(1);
3206       // If we are shifting right by less than Bits, and users don't demand any
3207       // bits that were shifted into [Bits-1:0], then we can consider this as an
3208       // N-Bit user.
3209       if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3210         break;
3211       return false;
3212     }
3213     case RISCV::SEXT_B:
3214     case RISCV::PACKH:
3215       if (Bits < 8)
3216         return false;
3217       break;
3218     case RISCV::SEXT_H:
3219     case RISCV::FMV_H_X:
3220     case RISCV::ZEXT_H_RV32:
3221     case RISCV::ZEXT_H_RV64:
3222     case RISCV::PACKW:
3223       if (Bits < 16)
3224         return false;
3225       break;
3226     case RISCV::PACK:
3227       if (Bits < (Subtarget->getXLen() / 2))
3228         return false;
3229       break;
3230     case RISCV::ADD_UW:
3231     case RISCV::SH1ADD_UW:
3232     case RISCV::SH2ADD_UW:
3233     case RISCV::SH3ADD_UW:
3234       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3235       // 32 bits.
3236       if (UI.getOperandNo() != 0 || Bits < 32)
3237         return false;
3238       break;
3239     case RISCV::SB:
3240       if (UI.getOperandNo() != 0 || Bits < 8)
3241         return false;
3242       break;
3243     case RISCV::SH:
3244       if (UI.getOperandNo() != 0 || Bits < 16)
3245         return false;
3246       break;
3247     case RISCV::SW:
3248       if (UI.getOperandNo() != 0 || Bits < 32)
3249         return false;
3250       break;
3251     }
3252   }
3253 
3254   return true;
3255 }
3256 
3257 // Select a constant that can be represented as (sign_extend(imm5) << imm2).
selectSimm5Shl2(SDValue N,SDValue & Simm5,SDValue & Shl2)3258 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3259                                         SDValue &Shl2) {
3260   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3261     int64_t Offset = C->getSExtValue();
3262     int64_t Shift;
3263     for (Shift = 0; Shift < 4; Shift++)
3264       if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3265         break;
3266 
3267     // Constant cannot be encoded.
3268     if (Shift == 4)
3269       return false;
3270 
3271     EVT Ty = N->getValueType(0);
3272     Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3273     Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3274     return true;
3275   }
3276 
3277   return false;
3278 }
3279 
3280 // Select VL as a 5 bit immediate or a value that will become a register. This
3281 // allows us to choose betwen VSETIVLI or VSETVLI later.
selectVLOp(SDValue N,SDValue & VL)3282 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3283   auto *C = dyn_cast<ConstantSDNode>(N);
3284   if (C && isUInt<5>(C->getZExtValue())) {
3285     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3286                                    N->getValueType(0));
3287   } else if (C && C->isAllOnes()) {
3288     // Treat all ones as VLMax.
3289     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3290                                    N->getValueType(0));
3291   } else if (isa<RegisterSDNode>(N) &&
3292              cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3293     // All our VL operands use an operand that allows GPRNoX0 or an immediate
3294     // as the register class. Convert X0 to a special immediate to pass the
3295     // MachineVerifier. This is recognized specially by the vsetvli insertion
3296     // pass.
3297     VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3298                                    N->getValueType(0));
3299   } else {
3300     VL = N;
3301   }
3302 
3303   return true;
3304 }
3305 
findVSplat(SDValue N)3306 static SDValue findVSplat(SDValue N) {
3307   if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3308     if (!N.getOperand(0).isUndef())
3309       return SDValue();
3310     N = N.getOperand(1);
3311   }
3312   SDValue Splat = N;
3313   if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3314        Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3315       !Splat.getOperand(0).isUndef())
3316     return SDValue();
3317   assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3318   return Splat;
3319 }
3320 
selectVSplat(SDValue N,SDValue & SplatVal)3321 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3322   SDValue Splat = findVSplat(N);
3323   if (!Splat)
3324     return false;
3325 
3326   SplatVal = Splat.getOperand(1);
3327   return true;
3328 }
3329 
selectVSplatImmHelper(SDValue N,SDValue & SplatVal,SelectionDAG & DAG,const RISCVSubtarget & Subtarget,std::function<bool (int64_t)> ValidateImm)3330 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3331                                   SelectionDAG &DAG,
3332                                   const RISCVSubtarget &Subtarget,
3333                                   std::function<bool(int64_t)> ValidateImm) {
3334   SDValue Splat = findVSplat(N);
3335   if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3336     return false;
3337 
3338   const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3339   assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3340          "Unexpected splat operand type");
3341 
3342   // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3343   // type is wider than the resulting vector element type: an implicit
3344   // truncation first takes place. Therefore, perform a manual
3345   // truncation/sign-extension in order to ignore any truncated bits and catch
3346   // any zero-extended immediate.
3347   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3348   // sign-extending to (XLenVT -1).
3349   APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3350 
3351   int64_t SplatImm = SplatConst.getSExtValue();
3352 
3353   if (!ValidateImm(SplatImm))
3354     return false;
3355 
3356   SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3357   return true;
3358 }
3359 
selectVSplatSimm5(SDValue N,SDValue & SplatVal)3360 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3361   return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3362                                [](int64_t Imm) { return isInt<5>(Imm); });
3363 }
3364 
selectVSplatSimm5Plus1(SDValue N,SDValue & SplatVal)3365 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3366   return selectVSplatImmHelper(
3367       N, SplatVal, *CurDAG, *Subtarget,
3368       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3369 }
3370 
selectVSplatSimm5Plus1NonZero(SDValue N,SDValue & SplatVal)3371 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3372                                                       SDValue &SplatVal) {
3373   return selectVSplatImmHelper(
3374       N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3375         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3376       });
3377 }
3378 
selectVSplatUimm(SDValue N,unsigned Bits,SDValue & SplatVal)3379 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3380                                          SDValue &SplatVal) {
3381   return selectVSplatImmHelper(
3382       N, SplatVal, *CurDAG, *Subtarget,
3383       [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3384 }
3385 
selectLow8BitsVSplat(SDValue N,SDValue & SplatVal)3386 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3387   auto IsExtOrTrunc = [](SDValue N) {
3388     switch (N->getOpcode()) {
3389     case ISD::SIGN_EXTEND:
3390     case ISD::ZERO_EXTEND:
3391     // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3392     // inactive elements will be undef.
3393     case RISCVISD::TRUNCATE_VECTOR_VL:
3394     case RISCVISD::VSEXT_VL:
3395     case RISCVISD::VZEXT_VL:
3396       return true;
3397     default:
3398       return false;
3399     }
3400   };
3401 
3402   // We can have multiple nested nodes, so unravel them all if needed.
3403   while (IsExtOrTrunc(N)) {
3404     if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3405       return false;
3406     N = N->getOperand(0);
3407   }
3408 
3409   return selectVSplat(N, SplatVal);
3410 }
3411 
selectFPImm(SDValue N,SDValue & Imm)3412 bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
3413   ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3414   if (!CFP)
3415     return false;
3416   const APFloat &APF = CFP->getValueAPF();
3417   // td can handle +0.0 already.
3418   if (APF.isPosZero())
3419     return false;
3420 
3421   MVT VT = CFP->getSimpleValueType(0);
3422 
3423   // Even if this FPImm requires an additional FNEG (i.e. the second element of
3424   // the returned pair is true) we still prefer FLI + FNEG over immediate
3425   // materialization as the latter might generate a longer instruction sequence.
3426   if (static_cast<const RISCVTargetLowering *>(TLI)
3427           ->getLegalZfaFPImm(APF, VT)
3428           .first >= 0)
3429     return false;
3430 
3431   MVT XLenVT = Subtarget->getXLenVT();
3432   if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3433     assert(APF.isNegZero() && "Unexpected constant.");
3434     return false;
3435   }
3436   SDLoc DL(N);
3437   Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3438                   *Subtarget);
3439   return true;
3440 }
3441 
selectRVVSimm5(SDValue N,unsigned Width,SDValue & Imm)3442 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3443                                        SDValue &Imm) {
3444   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3445     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3446 
3447     if (!isInt<5>(ImmVal))
3448       return false;
3449 
3450     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3451     return true;
3452   }
3453 
3454   return false;
3455 }
3456 
3457 // Try to remove sext.w if the input is a W instruction or can be made into
3458 // a W instruction cheaply.
doPeepholeSExtW(SDNode * N)3459 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3460   // Look for the sext.w pattern, addiw rd, rs1, 0.
3461   if (N->getMachineOpcode() != RISCV::ADDIW ||
3462       !isNullConstant(N->getOperand(1)))
3463     return false;
3464 
3465   SDValue N0 = N->getOperand(0);
3466   if (!N0.isMachineOpcode())
3467     return false;
3468 
3469   switch (N0.getMachineOpcode()) {
3470   default:
3471     break;
3472   case RISCV::ADD:
3473   case RISCV::ADDI:
3474   case RISCV::SUB:
3475   case RISCV::MUL:
3476   case RISCV::SLLI: {
3477     // Convert sext.w+add/sub/mul to their W instructions. This will create
3478     // a new independent instruction. This improves latency.
3479     unsigned Opc;
3480     switch (N0.getMachineOpcode()) {
3481     default:
3482       llvm_unreachable("Unexpected opcode!");
3483     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
3484     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3485     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
3486     case RISCV::MUL:  Opc = RISCV::MULW;  break;
3487     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3488     }
3489 
3490     SDValue N00 = N0.getOperand(0);
3491     SDValue N01 = N0.getOperand(1);
3492 
3493     // Shift amount needs to be uimm5.
3494     if (N0.getMachineOpcode() == RISCV::SLLI &&
3495         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3496       break;
3497 
3498     SDNode *Result =
3499         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3500                                N00, N01);
3501     ReplaceUses(N, Result);
3502     return true;
3503   }
3504   case RISCV::ADDW:
3505   case RISCV::ADDIW:
3506   case RISCV::SUBW:
3507   case RISCV::MULW:
3508   case RISCV::SLLIW:
3509   case RISCV::PACKW:
3510   case RISCV::TH_MULAW:
3511   case RISCV::TH_MULAH:
3512   case RISCV::TH_MULSW:
3513   case RISCV::TH_MULSH:
3514     if (N0.getValueType() == MVT::i32)
3515       break;
3516 
3517     // Result is already sign extended just remove the sext.w.
3518     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3519     ReplaceUses(N, N0.getNode());
3520     return true;
3521   }
3522 
3523   return false;
3524 }
3525 
3526 // After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3527 // that's glued to the pseudo. This tries to look up the value that was copied
3528 // to V0.
getMaskSetter(SDValue MaskOp,SDValue GlueOp)3529 static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3530   // Check that we're using V0 as a mask register.
3531   if (!isa<RegisterSDNode>(MaskOp) ||
3532       cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3533     return SDValue();
3534 
3535   // The glued user defines V0.
3536   const auto *Glued = GlueOp.getNode();
3537 
3538   if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3539     return SDValue();
3540 
3541   // Check that we're defining V0 as a mask register.
3542   if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3543       cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3544     return SDValue();
3545 
3546   SDValue MaskSetter = Glued->getOperand(2);
3547 
3548   // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3549   // from an extract_subvector or insert_subvector.
3550   if (MaskSetter->isMachineOpcode() &&
3551       MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3552     MaskSetter = MaskSetter->getOperand(0);
3553 
3554   return MaskSetter;
3555 }
3556 
usesAllOnesMask(SDValue MaskOp,SDValue GlueOp)3557 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3558   // Check the instruction defining V0; it needs to be a VMSET pseudo.
3559   SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3560   if (!MaskSetter)
3561     return false;
3562 
3563   const auto IsVMSet = [](unsigned Opc) {
3564     return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3565            Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3566            Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3567            Opc == RISCV::PseudoVMSET_M_B8;
3568   };
3569 
3570   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3571   // undefined behaviour if it's the wrong bitwidth, so we could choose to
3572   // assume that it's all-ones? Same applies to its VL.
3573   return MaskSetter->isMachineOpcode() &&
3574          IsVMSet(MaskSetter.getMachineOpcode());
3575 }
3576 
3577 // Return true if we can make sure mask of N is all-ones mask.
usesAllOnesMask(SDNode * N,unsigned MaskOpIdx)3578 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3579   return usesAllOnesMask(N->getOperand(MaskOpIdx),
3580                          N->getOperand(N->getNumOperands() - 1));
3581 }
3582 
isImplicitDef(SDValue V)3583 static bool isImplicitDef(SDValue V) {
3584   if (!V.isMachineOpcode())
3585     return false;
3586   if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3587     for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3588       if (!isImplicitDef(V.getOperand(I)))
3589         return false;
3590     return true;
3591   }
3592   return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3593 }
3594 
3595 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
3596 // corresponding "unmasked" pseudo versions. The mask we're interested in will
3597 // take the form of a V0 physical register operand, with a glued
3598 // register-setting instruction.
doPeepholeMaskedRVV(MachineSDNode * N)3599 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3600   const RISCV::RISCVMaskedPseudoInfo *I =
3601       RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3602   if (!I)
3603     return false;
3604 
3605   unsigned MaskOpIdx = I->MaskOpIdx;
3606   if (!usesAllOnesMask(N, MaskOpIdx))
3607     return false;
3608 
3609   // There are two classes of pseudos in the table - compares and
3610   // everything else.  See the comment on RISCVMaskedPseudo for details.
3611   const unsigned Opc = I->UnmaskedPseudo;
3612   const MCInstrDesc &MCID = TII->get(Opc);
3613   const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3614 #ifndef NDEBUG
3615   const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3616   assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
3617          RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
3618          "Masked and unmasked pseudos are inconsistent");
3619   const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3620   assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3621 #endif
3622 
3623   SmallVector<SDValue, 8> Ops;
3624   // Skip the merge operand at index 0 if !UseTUPseudo.
3625   for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3626     // Skip the mask, and the Glue.
3627     SDValue Op = N->getOperand(I);
3628     if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3629       continue;
3630     Ops.push_back(Op);
3631   }
3632 
3633   // Transitively apply any node glued to our new node.
3634   const auto *Glued = N->getGluedNode();
3635   if (auto *TGlued = Glued->getGluedNode())
3636     Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3637 
3638   MachineSDNode *Result =
3639       CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3640 
3641   if (!N->memoperands_empty())
3642     CurDAG->setNodeMemRefs(Result, N->memoperands());
3643 
3644   Result->setFlags(N->getFlags());
3645   ReplaceUses(N, Result);
3646 
3647   return true;
3648 }
3649 
IsVMerge(SDNode * N)3650 static bool IsVMerge(SDNode *N) {
3651   return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3652 }
3653 
IsVMv(SDNode * N)3654 static bool IsVMv(SDNode *N) {
3655   return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3656 }
3657 
GetVMSetForLMul(RISCVII::VLMUL LMUL)3658 static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3659   switch (LMUL) {
3660   case RISCVII::LMUL_F8:
3661     return RISCV::PseudoVMSET_M_B1;
3662   case RISCVII::LMUL_F4:
3663     return RISCV::PseudoVMSET_M_B2;
3664   case RISCVII::LMUL_F2:
3665     return RISCV::PseudoVMSET_M_B4;
3666   case RISCVII::LMUL_1:
3667     return RISCV::PseudoVMSET_M_B8;
3668   case RISCVII::LMUL_2:
3669     return RISCV::PseudoVMSET_M_B16;
3670   case RISCVII::LMUL_4:
3671     return RISCV::PseudoVMSET_M_B32;
3672   case RISCVII::LMUL_8:
3673     return RISCV::PseudoVMSET_M_B64;
3674   case RISCVII::LMUL_RESERVED:
3675     llvm_unreachable("Unexpected LMUL");
3676   }
3677   llvm_unreachable("Unknown VLMUL enum");
3678 }
3679 
3680 // Try to fold away VMERGE_VVM instructions into their true operands:
3681 //
3682 // %true = PseudoVADD_VV ...
3683 // %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3684 // ->
3685 // %x = PseudoVADD_VV_MASK %false, ..., %mask
3686 //
3687 // We can only fold if vmerge's merge operand, vmerge's false operand and
3688 // %true's merge operand (if it has one) are the same. This is because we have
3689 // to consolidate them into one merge operand in the result.
3690 //
3691 // If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3692 // mask is all ones.
3693 //
3694 // We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3695 // VMERGE_VVM with an all ones mask.
3696 //
3697 // The resulting VL is the minimum of the two VLs.
3698 //
3699 // The resulting policy is the effective policy the vmerge would have had,
3700 // i.e. whether or not it's merge operand was implicit-def.
performCombineVMergeAndVOps(SDNode * N)3701 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3702   SDValue Merge, False, True, VL, Mask, Glue;
3703   // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3704   if (IsVMv(N)) {
3705     Merge = N->getOperand(0);
3706     False = N->getOperand(0);
3707     True = N->getOperand(1);
3708     VL = N->getOperand(2);
3709     // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3710     // mask later below.
3711   } else {
3712     assert(IsVMerge(N));
3713     Merge = N->getOperand(0);
3714     False = N->getOperand(1);
3715     True = N->getOperand(2);
3716     Mask = N->getOperand(3);
3717     VL = N->getOperand(4);
3718     // We always have a glue node for the mask at v0.
3719     Glue = N->getOperand(N->getNumOperands() - 1);
3720   }
3721   assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3722   assert(!Glue || Glue.getValueType() == MVT::Glue);
3723 
3724   // If the EEW of True is different from vmerge's SEW, then we can't fold.
3725   if (True.getSimpleValueType() != N->getSimpleValueType(0))
3726     return false;
3727 
3728   // We require that either merge and false are the same, or that merge
3729   // is undefined.
3730   if (Merge != False && !isImplicitDef(Merge))
3731     return false;
3732 
3733   assert(True.getResNo() == 0 &&
3734          "Expect True is the first output of an instruction.");
3735 
3736   // Need N is the exactly one using True.
3737   if (!True.hasOneUse())
3738     return false;
3739 
3740   if (!True.isMachineOpcode())
3741     return false;
3742 
3743   unsigned TrueOpc = True.getMachineOpcode();
3744   const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3745   uint64_t TrueTSFlags = TrueMCID.TSFlags;
3746   bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3747 
3748   bool IsMasked = false;
3749   const RISCV::RISCVMaskedPseudoInfo *Info =
3750       RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3751   if (!Info && HasTiedDest) {
3752     Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3753     IsMasked = true;
3754   }
3755   assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3756 
3757   if (!Info)
3758     return false;
3759 
3760   // If True has a merge operand then it needs to be the same as vmerge's False,
3761   // since False will be used for the result's merge operand.
3762   if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3763     SDValue MergeOpTrue = True->getOperand(0);
3764     if (False != MergeOpTrue)
3765       return false;
3766   }
3767 
3768   // If True is masked then the vmerge must have either the same mask or an all
3769   // 1s mask, since we're going to keep the mask from True.
3770   if (IsMasked && Mask) {
3771     // FIXME: Support mask agnostic True instruction which would have an
3772     // undef merge operand.
3773     SDValue TrueMask =
3774         getMaskSetter(True->getOperand(Info->MaskOpIdx),
3775                       True->getOperand(True->getNumOperands() - 1));
3776     assert(TrueMask);
3777     if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3778       return false;
3779   }
3780 
3781   // Skip if True has side effect.
3782   if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3783     return false;
3784 
3785   // The last operand of a masked instruction may be glued.
3786   bool HasGlueOp = True->getGluedNode() != nullptr;
3787 
3788   // The chain operand may exist either before the glued operands or in the last
3789   // position.
3790   unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3791   bool HasChainOp =
3792       True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3793 
3794   if (HasChainOp) {
3795     // Avoid creating cycles in the DAG. We must ensure that none of the other
3796     // operands depend on True through it's Chain.
3797     SmallVector<const SDNode *, 4> LoopWorklist;
3798     SmallPtrSet<const SDNode *, 16> Visited;
3799     LoopWorklist.push_back(False.getNode());
3800     if (Mask)
3801       LoopWorklist.push_back(Mask.getNode());
3802     LoopWorklist.push_back(VL.getNode());
3803     if (Glue)
3804       LoopWorklist.push_back(Glue.getNode());
3805     if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3806       return false;
3807   }
3808 
3809   // The vector policy operand may be present for masked intrinsics
3810   bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3811   unsigned TrueVLIndex =
3812       True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3813   SDValue TrueVL = True.getOperand(TrueVLIndex);
3814   SDValue SEW = True.getOperand(TrueVLIndex + 1);
3815 
3816   auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3817     if (LHS == RHS)
3818       return LHS;
3819     if (isAllOnesConstant(LHS))
3820       return RHS;
3821     if (isAllOnesConstant(RHS))
3822       return LHS;
3823     auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3824     auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3825     if (!CLHS || !CRHS)
3826       return SDValue();
3827     return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3828   };
3829 
3830   // Because N and True must have the same merge operand (or True's operand is
3831   // implicit_def), the "effective" body is the minimum of their VLs.
3832   SDValue OrigVL = VL;
3833   VL = GetMinVL(TrueVL, VL);
3834   if (!VL)
3835     return false;
3836 
3837   // Some operations produce different elementwise results depending on the
3838   // active elements, like viota.m or vredsum. This transformation is illegal
3839   // for these if we change the active elements (i.e. mask or VL).
3840   if (Info->ActiveElementsAffectResult) {
3841     if (Mask && !usesAllOnesMask(Mask, Glue))
3842       return false;
3843     if (TrueVL != VL)
3844       return false;
3845   }
3846 
3847   // If we end up changing the VL or mask of True, then we need to make sure it
3848   // doesn't raise any observable fp exceptions, since changing the active
3849   // elements will affect how fflags is set.
3850   if (TrueVL != VL || !IsMasked)
3851     if (mayRaiseFPException(True.getNode()) &&
3852         !True->getFlags().hasNoFPExcept())
3853       return false;
3854 
3855   SDLoc DL(N);
3856 
3857   // From the preconditions we checked above, we know the mask and thus glue
3858   // for the result node will be taken from True.
3859   if (IsMasked) {
3860     Mask = True->getOperand(Info->MaskOpIdx);
3861     Glue = True->getOperand(True->getNumOperands() - 1);
3862     assert(Glue.getValueType() == MVT::Glue);
3863   }
3864   // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3865   // an all-ones mask to use.
3866   else if (IsVMv(N)) {
3867     unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3868     unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3869     ElementCount EC = N->getValueType(0).getVectorElementCount();
3870     MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3871 
3872     SDValue AllOnesMask =
3873         SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3874     SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
3875                                             RISCV::V0, AllOnesMask, SDValue());
3876     Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3877     Glue = MaskCopy.getValue(1);
3878   }
3879 
3880   unsigned MaskedOpc = Info->MaskedPseudo;
3881 #ifndef NDEBUG
3882   const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3883   assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
3884          "Expected instructions with mask have policy operand.");
3885   assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3886                                          MCOI::TIED_TO) == 0 &&
3887          "Expected instructions with mask have a tied dest.");
3888 #endif
3889 
3890   // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3891   // operand is undefined.
3892   //
3893   // However, if the VL became smaller than what the vmerge had originally, then
3894   // elements past VL that were previously in the vmerge's body will have moved
3895   // to the tail. In that case we always need to use tail undisturbed to
3896   // preserve them.
3897   bool MergeVLShrunk = VL != OrigVL;
3898   uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3899                         ? RISCVII::TAIL_AGNOSTIC
3900                         : /*TUMU*/ 0;
3901   SDValue PolicyOp =
3902     CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3903 
3904 
3905   SmallVector<SDValue, 8> Ops;
3906   Ops.push_back(False);
3907 
3908   const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3909   const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3910   assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3911   Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3912 
3913   Ops.push_back(Mask);
3914 
3915   // For unmasked "VOp" with rounding mode operand, that is interfaces like
3916   // (..., rm, vl) or (..., rm, vl, policy).
3917   // Its masked version is (..., vm, rm, vl, policy).
3918   // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3919   if (HasRoundingMode)
3920     Ops.push_back(True->getOperand(TrueVLIndex - 1));
3921 
3922   Ops.append({VL, SEW, PolicyOp});
3923 
3924   // Result node should have chain operand of True.
3925   if (HasChainOp)
3926     Ops.push_back(True.getOperand(TrueChainOpIdx));
3927 
3928   // Add the glue for the CopyToReg of mask->v0.
3929   Ops.push_back(Glue);
3930 
3931   MachineSDNode *Result =
3932       CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3933   Result->setFlags(True->getFlags());
3934 
3935   if (!cast<MachineSDNode>(True)->memoperands_empty())
3936     CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3937 
3938   // Replace vmerge.vvm node by Result.
3939   ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3940 
3941   // Replace another value of True. E.g. chain and VL.
3942   for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3943     ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3944 
3945   return true;
3946 }
3947 
doPeepholeMergeVVMFold()3948 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3949   bool MadeChange = false;
3950   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3951 
3952   while (Position != CurDAG->allnodes_begin()) {
3953     SDNode *N = &*--Position;
3954     if (N->use_empty() || !N->isMachineOpcode())
3955       continue;
3956 
3957     if (IsVMerge(N) || IsVMv(N))
3958       MadeChange |= performCombineVMergeAndVOps(N);
3959   }
3960   return MadeChange;
3961 }
3962 
3963 /// If our passthru is an implicit_def, use noreg instead.  This side
3964 /// steps issues with MachineCSE not being able to CSE expressions with
3965 /// IMPLICIT_DEF operands while preserving the semantic intent. See
3966 /// pr64282 for context. Note that this transform is the last one
3967 /// performed at ISEL DAG to DAG.
doPeepholeNoRegPassThru()3968 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3969   bool MadeChange = false;
3970   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3971 
3972   while (Position != CurDAG->allnodes_begin()) {
3973     SDNode *N = &*--Position;
3974     if (N->use_empty() || !N->isMachineOpcode())
3975       continue;
3976 
3977     const unsigned Opc = N->getMachineOpcode();
3978     if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3979         !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
3980         !isImplicitDef(N->getOperand(0)))
3981       continue;
3982 
3983     SmallVector<SDValue> Ops;
3984     Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3985     for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3986       SDValue Op = N->getOperand(I);
3987       Ops.push_back(Op);
3988     }
3989 
3990     MachineSDNode *Result =
3991       CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3992     Result->setFlags(N->getFlags());
3993     CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3994     ReplaceUses(N, Result);
3995     MadeChange = true;
3996   }
3997   return MadeChange;
3998 }
3999 
4000 
4001 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
4002 // for instruction scheduling.
createRISCVISelDag(RISCVTargetMachine & TM,CodeGenOptLevel OptLevel)4003 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4004                                        CodeGenOptLevel OptLevel) {
4005   return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4006 }
4007 
4008 char RISCVDAGToDAGISelLegacy::ID = 0;
4009 
RISCVDAGToDAGISelLegacy(RISCVTargetMachine & TM,CodeGenOptLevel OptLevel)4010 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4011                                                  CodeGenOptLevel OptLevel)
4012     : SelectionDAGISelLegacy(
4013           ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4014 
4015 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4016