xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISC-V target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMCTargetDesc.h"
16 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVISelLowering.h"
18 #include "RISCVInstrInfo.h"
19 #include "RISCVSelectionDAGInfo.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/IR/IntrinsicsRISCV.h"
22 #include "llvm/Support/Alignment.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "riscv-isel"
30 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
31 
32 static cl::opt<bool> UsePseudoMovImm(
33     "riscv-use-rematerializable-movimm", cl::Hidden,
34     cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
35              "constant materialization"),
36     cl::init(false));
37 
38 #define GET_DAGISEL_BODY RISCVDAGToDAGISel
39 #include "RISCVGenDAGISel.inc"
40 
PreprocessISelDAG()41 void RISCVDAGToDAGISel::PreprocessISelDAG() {
42   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
43 
44   bool MadeChange = false;
45   while (Position != CurDAG->allnodes_begin()) {
46     SDNode *N = &*--Position;
47     if (N->use_empty())
48       continue;
49 
50     SDValue Result;
51     switch (N->getOpcode()) {
52     case ISD::SPLAT_VECTOR: {
53       // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
54       // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
55       MVT VT = N->getSimpleValueType(0);
56       unsigned Opc =
57           VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
58       SDLoc DL(N);
59       SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
60       SDValue Src = N->getOperand(0);
61       if (VT.isInteger())
62         Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
63                               N->getOperand(0));
64       Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
65       break;
66     }
67     case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
68       // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
69       // load. Done after lowering and combining so that we have a chance to
70       // optimize this to VMV_V_X_VL when the upper bits aren't needed.
71       assert(N->getNumOperands() == 4 && "Unexpected number of operands");
72       MVT VT = N->getSimpleValueType(0);
73       SDValue Passthru = N->getOperand(0);
74       SDValue Lo = N->getOperand(1);
75       SDValue Hi = N->getOperand(2);
76       SDValue VL = N->getOperand(3);
77       assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
78              Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
79              "Unexpected VTs!");
80       MachineFunction &MF = CurDAG->getMachineFunction();
81       SDLoc DL(N);
82 
83       // Create temporary stack for each expanding node.
84       SDValue StackSlot =
85           CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
86       int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
87       MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
88 
89       SDValue Chain = CurDAG->getEntryNode();
90       Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
91 
92       SDValue OffsetSlot =
93           CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
94       Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
95                             Align(8));
96 
97       Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
98 
99       SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
100       SDValue IntID =
101           CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
102       SDValue Ops[] = {Chain,
103                        IntID,
104                        Passthru,
105                        StackSlot,
106                        CurDAG->getRegister(RISCV::X0, MVT::i64),
107                        VL};
108 
109       Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
110                                            MVT::i64, MPI, Align(8),
111                                            MachineMemOperand::MOLoad);
112       break;
113     }
114     case ISD::FP_EXTEND: {
115       // We only have vector patterns for riscv_fpextend_vl in isel.
116       SDLoc DL(N);
117       MVT VT = N->getSimpleValueType(0);
118       if (!VT.isVector())
119         break;
120       SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
121       SDValue TrueMask = CurDAG->getNode(
122           RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
123       Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
124                                TrueMask, VLMAX);
125       break;
126     }
127     }
128 
129     if (Result) {
130       LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld:    ");
131       LLVM_DEBUG(N->dump(CurDAG));
132       LLVM_DEBUG(dbgs() << "\nNew: ");
133       LLVM_DEBUG(Result->dump(CurDAG));
134       LLVM_DEBUG(dbgs() << "\n");
135 
136       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
137       MadeChange = true;
138     }
139   }
140 
141   if (MadeChange)
142     CurDAG->RemoveDeadNodes();
143 }
144 
PostprocessISelDAG()145 void RISCVDAGToDAGISel::PostprocessISelDAG() {
146   HandleSDNode Dummy(CurDAG->getRoot());
147   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
148 
149   bool MadeChange = false;
150   while (Position != CurDAG->allnodes_begin()) {
151     SDNode *N = &*--Position;
152     // Skip dead nodes and any non-machine opcodes.
153     if (N->use_empty() || !N->isMachineOpcode())
154       continue;
155 
156     MadeChange |= doPeepholeSExtW(N);
157 
158     // FIXME: This is here only because the VMerge transform doesn't
159     // know how to handle masked true inputs.  Once that has been moved
160     // to post-ISEL, this can be deleted as well.
161     MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
162   }
163 
164   CurDAG->setRoot(Dummy.getValue());
165 
166   // After we're done with everything else, convert IMPLICIT_DEF
167   // passthru operands to NoRegister.  This is required to workaround
168   // an optimization deficiency in MachineCSE.  This really should
169   // be merged back into each of the patterns (i.e. there's no good
170   // reason not to go directly to NoReg), but is being done this way
171   // to allow easy backporting.
172   MadeChange |= doPeepholeNoRegPassThru();
173 
174   if (MadeChange)
175     CurDAG->RemoveDeadNodes();
176 }
177 
selectImmSeq(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,RISCVMatInt::InstSeq & Seq)178 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
179                             RISCVMatInt::InstSeq &Seq) {
180   SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
181   for (const RISCVMatInt::Inst &Inst : Seq) {
182     SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
183     SDNode *Result = nullptr;
184     switch (Inst.getOpndKind()) {
185     case RISCVMatInt::Imm:
186       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
187       break;
188     case RISCVMatInt::RegX0:
189       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
190                                       CurDAG->getRegister(RISCV::X0, VT));
191       break;
192     case RISCVMatInt::RegReg:
193       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
194       break;
195     case RISCVMatInt::RegImm:
196       Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
197       break;
198     }
199 
200     // Only the first instruction has X0 as its source.
201     SrcReg = SDValue(Result, 0);
202   }
203 
204   return SrcReg;
205 }
206 
selectImm(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,int64_t Imm,const RISCVSubtarget & Subtarget)207 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
208                          int64_t Imm, const RISCVSubtarget &Subtarget) {
209   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
210 
211   // Use a rematerializable pseudo instruction for short sequences if enabled.
212   if (Seq.size() == 2 && UsePseudoMovImm)
213     return SDValue(
214         CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
215                                CurDAG->getSignedTargetConstant(Imm, DL, VT)),
216         0);
217 
218   // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
219   // worst an LUI+ADDIW. This will require an extra register, but avoids a
220   // constant pool.
221   // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
222   // low and high 32 bits are the same and bit 31 and 63 are set.
223   if (Seq.size() > 3) {
224     unsigned ShiftAmt, AddOpc;
225     RISCVMatInt::InstSeq SeqLo =
226         RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
227     if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
228       SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
229 
230       SDValue SLLI = SDValue(
231           CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
232                                  CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
233           0);
234       return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
235     }
236   }
237 
238   // Otherwise, use the original sequence.
239   return selectImmSeq(CurDAG, DL, VT, Seq);
240 }
241 
addVectorLoadStoreOperands(SDNode * Node,unsigned Log2SEW,const SDLoc & DL,unsigned CurOp,bool IsMasked,bool IsStridedOrIndexed,SmallVectorImpl<SDValue> & Operands,bool IsLoad,MVT * IndexVT)242 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
243     SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
244     bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
245     bool IsLoad, MVT *IndexVT) {
246   SDValue Chain = Node->getOperand(0);
247 
248   Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
249 
250   if (IsStridedOrIndexed) {
251     Operands.push_back(Node->getOperand(CurOp++)); // Index.
252     if (IndexVT)
253       *IndexVT = Operands.back()->getSimpleValueType(0);
254   }
255 
256   if (IsMasked) {
257     SDValue Mask = Node->getOperand(CurOp++);
258     Operands.push_back(Mask);
259   }
260   SDValue VL;
261   selectVLOp(Node->getOperand(CurOp++), VL);
262   Operands.push_back(VL);
263 
264   MVT XLenVT = Subtarget->getXLenVT();
265   SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
266   Operands.push_back(SEWOp);
267 
268   // At the IR layer, all the masked load intrinsics have policy operands,
269   // none of the others do.  All have passthru operands.  For our pseudos,
270   // all loads have policy operands.
271   if (IsLoad) {
272     uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
273     if (IsMasked)
274       Policy = Node->getConstantOperandVal(CurOp++);
275     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
276     Operands.push_back(PolicyOp);
277   }
278 
279   Operands.push_back(Chain); // Chain.
280 }
281 
selectVLSEG(SDNode * Node,unsigned NF,bool IsMasked,bool IsStrided)282 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
283                                     bool IsStrided) {
284   SDLoc DL(Node);
285   MVT VT = Node->getSimpleValueType(0);
286   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
287   RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
288 
289   unsigned CurOp = 2;
290   SmallVector<SDValue, 8> Operands;
291 
292   Operands.push_back(Node->getOperand(CurOp++));
293 
294   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
295                              Operands, /*IsLoad=*/true);
296 
297   const RISCV::VLSEGPseudo *P =
298       RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
299                             static_cast<unsigned>(LMUL));
300   MachineSDNode *Load =
301       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
302 
303   CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
304 
305   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
306   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
307   CurDAG->RemoveDeadNode(Node);
308 }
309 
selectVLSEGFF(SDNode * Node,unsigned NF,bool IsMasked)310 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
311                                       bool IsMasked) {
312   SDLoc DL(Node);
313   MVT VT = Node->getSimpleValueType(0);
314   MVT XLenVT = Subtarget->getXLenVT();
315   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
316   RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
317 
318   unsigned CurOp = 2;
319   SmallVector<SDValue, 7> Operands;
320 
321   Operands.push_back(Node->getOperand(CurOp++));
322 
323   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
324                              /*IsStridedOrIndexed*/ false, Operands,
325                              /*IsLoad=*/true);
326 
327   const RISCV::VLSEGPseudo *P =
328       RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
329                             Log2SEW, static_cast<unsigned>(LMUL));
330   MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
331                                                XLenVT, MVT::Other, Operands);
332 
333   CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
334 
335   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
336   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
337   ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
338   CurDAG->RemoveDeadNode(Node);
339 }
340 
selectVLXSEG(SDNode * Node,unsigned NF,bool IsMasked,bool IsOrdered)341 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
342                                      bool IsOrdered) {
343   SDLoc DL(Node);
344   MVT VT = Node->getSimpleValueType(0);
345   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
346   RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
347 
348   unsigned CurOp = 2;
349   SmallVector<SDValue, 8> Operands;
350 
351   Operands.push_back(Node->getOperand(CurOp++));
352 
353   MVT IndexVT;
354   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
355                              /*IsStridedOrIndexed*/ true, Operands,
356                              /*IsLoad=*/true, &IndexVT);
357 
358 #ifndef NDEBUG
359   // Number of element = RVVBitsPerBlock * LMUL / SEW
360   unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
361   auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
362   if (DecodedLMUL.second)
363     ContainedTyNumElts /= DecodedLMUL.first;
364   else
365     ContainedTyNumElts *= DecodedLMUL.first;
366   assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
367          "Element count mismatch");
368 #endif
369 
370   RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
371   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
372   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
373     report_fatal_error("The V extension does not support EEW=64 for index "
374                        "values when XLEN=32");
375   }
376   const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
377       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
378       static_cast<unsigned>(IndexLMUL));
379   MachineSDNode *Load =
380       CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
381 
382   CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
383 
384   ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
385   ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
386   CurDAG->RemoveDeadNode(Node);
387 }
388 
selectVSSEG(SDNode * Node,unsigned NF,bool IsMasked,bool IsStrided)389 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
390                                     bool IsStrided) {
391   SDLoc DL(Node);
392   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
393   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
394   RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
395 
396   unsigned CurOp = 2;
397   SmallVector<SDValue, 8> Operands;
398 
399   Operands.push_back(Node->getOperand(CurOp++));
400 
401   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
402                              Operands);
403 
404   const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
405       NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
406   MachineSDNode *Store =
407       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
408 
409   CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
410 
411   ReplaceNode(Node, Store);
412 }
413 
selectVSXSEG(SDNode * Node,unsigned NF,bool IsMasked,bool IsOrdered)414 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
415                                      bool IsOrdered) {
416   SDLoc DL(Node);
417   MVT VT = Node->getOperand(2)->getSimpleValueType(0);
418   unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
419   RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
420 
421   unsigned CurOp = 2;
422   SmallVector<SDValue, 8> Operands;
423 
424   Operands.push_back(Node->getOperand(CurOp++));
425 
426   MVT IndexVT;
427   addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
428                              /*IsStridedOrIndexed*/ true, Operands,
429                              /*IsLoad=*/false, &IndexVT);
430 
431 #ifndef NDEBUG
432   // Number of element = RVVBitsPerBlock * LMUL / SEW
433   unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
434   auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
435   if (DecodedLMUL.second)
436     ContainedTyNumElts /= DecodedLMUL.first;
437   else
438     ContainedTyNumElts *= DecodedLMUL.first;
439   assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
440          "Element count mismatch");
441 #endif
442 
443   RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
444   unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
445   if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
446     report_fatal_error("The V extension does not support EEW=64 for index "
447                        "values when XLEN=32");
448   }
449   const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
450       NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
451       static_cast<unsigned>(IndexLMUL));
452   MachineSDNode *Store =
453       CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
454 
455   CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
456 
457   ReplaceNode(Node, Store);
458 }
459 
selectVSETVLI(SDNode * Node)460 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
461   if (!Subtarget->hasVInstructions())
462     return;
463 
464   assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
465 
466   SDLoc DL(Node);
467   MVT XLenVT = Subtarget->getXLenVT();
468 
469   unsigned IntNo = Node->getConstantOperandVal(0);
470 
471   assert((IntNo == Intrinsic::riscv_vsetvli ||
472           IntNo == Intrinsic::riscv_vsetvlimax) &&
473          "Unexpected vsetvli intrinsic");
474 
475   bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
476   unsigned Offset = (VLMax ? 1 : 2);
477 
478   assert(Node->getNumOperands() == Offset + 2 &&
479          "Unexpected number of operands");
480 
481   unsigned SEW =
482       RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
483   RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
484       Node->getConstantOperandVal(Offset + 1) & 0x7);
485 
486   unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
487                                             /*MaskAgnostic*/ true);
488   SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
489 
490   SDValue VLOperand;
491   unsigned Opcode = RISCV::PseudoVSETVLI;
492   if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
493     if (auto VLEN = Subtarget->getRealVLen())
494       if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
495         VLMax = true;
496   }
497   if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
498     VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
499     Opcode = RISCV::PseudoVSETVLIX0;
500   } else {
501     VLOperand = Node->getOperand(1);
502 
503     if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
504       uint64_t AVL = C->getZExtValue();
505       if (isUInt<5>(AVL)) {
506         SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
507         ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
508                                                  XLenVT, VLImm, VTypeIOp));
509         return;
510       }
511     }
512   }
513 
514   ReplaceNode(Node,
515               CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
516 }
517 
tryShrinkShlLogicImm(SDNode * Node)518 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
519   MVT VT = Node->getSimpleValueType(0);
520   unsigned Opcode = Node->getOpcode();
521   assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
522          "Unexpected opcode");
523   SDLoc DL(Node);
524 
525   // For operations of the form (x << C1) op C2, check if we can use
526   // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
527   SDValue N0 = Node->getOperand(0);
528   SDValue N1 = Node->getOperand(1);
529 
530   ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
531   if (!Cst)
532     return false;
533 
534   int64_t Val = Cst->getSExtValue();
535 
536   // Check if immediate can already use ANDI/ORI/XORI.
537   if (isInt<12>(Val))
538     return false;
539 
540   SDValue Shift = N0;
541 
542   // If Val is simm32 and we have a sext_inreg from i32, then the binop
543   // produces at least 33 sign bits. We can peek through the sext_inreg and use
544   // a SLLIW at the end.
545   bool SignExt = false;
546   if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
547       N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
548     SignExt = true;
549     Shift = N0.getOperand(0);
550   }
551 
552   if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
553     return false;
554 
555   ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
556   if (!ShlCst)
557     return false;
558 
559   uint64_t ShAmt = ShlCst->getZExtValue();
560 
561   // Make sure that we don't change the operation by removing bits.
562   // This only matters for OR and XOR, AND is unaffected.
563   uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
564   if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
565     return false;
566 
567   int64_t ShiftedVal = Val >> ShAmt;
568   if (!isInt<12>(ShiftedVal))
569     return false;
570 
571   // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
572   if (SignExt && ShAmt >= 32)
573     return false;
574 
575   // Ok, we can reorder to get a smaller immediate.
576   unsigned BinOpc;
577   switch (Opcode) {
578   default: llvm_unreachable("Unexpected opcode");
579   case ISD::AND: BinOpc = RISCV::ANDI; break;
580   case ISD::OR:  BinOpc = RISCV::ORI;  break;
581   case ISD::XOR: BinOpc = RISCV::XORI; break;
582   }
583 
584   unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
585 
586   SDNode *BinOp = CurDAG->getMachineNode(
587       BinOpc, DL, VT, Shift.getOperand(0),
588       CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
589   SDNode *SLLI =
590       CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
591                              CurDAG->getTargetConstant(ShAmt, DL, VT));
592   ReplaceNode(Node, SLLI);
593   return true;
594 }
595 
trySignedBitfieldExtract(SDNode * Node)596 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
597   unsigned Opc;
598 
599   if (Subtarget->hasVendorXTHeadBb())
600     Opc = RISCV::TH_EXT;
601   else if (Subtarget->hasVendorXAndesPerf())
602     Opc = RISCV::NDS_BFOS;
603   else if (Subtarget->hasVendorXqcibm())
604     Opc = RISCV::QC_EXT;
605   else
606     // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
607     return false;
608 
609   auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
610   if (!N1C)
611     return false;
612 
613   SDValue N0 = Node->getOperand(0);
614   if (!N0.hasOneUse())
615     return false;
616 
617   auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
618                              const SDLoc &DL, MVT VT) {
619     if (Opc == RISCV::QC_EXT) {
620       // QC.EXT X, width, shamt
621       // shamt is the same as Lsb
622       // width is the number of bits to extract from the Lsb
623       Msb = Msb - Lsb + 1;
624     }
625     return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
626                                   CurDAG->getTargetConstant(Msb, DL, VT),
627                                   CurDAG->getTargetConstant(Lsb, DL, VT));
628   };
629 
630   SDLoc DL(Node);
631   MVT VT = Node->getSimpleValueType(0);
632   const unsigned RightShAmt = N1C->getZExtValue();
633 
634   // Transform (sra (shl X, C1) C2) with C1 < C2
635   //        -> (SignedBitfieldExtract X, msb, lsb)
636   if (N0.getOpcode() == ISD::SHL) {
637     auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
638     if (!N01C)
639       return false;
640 
641     const unsigned LeftShAmt = N01C->getZExtValue();
642     // Make sure that this is a bitfield extraction (i.e., the shift-right
643     // amount can not be less than the left-shift).
644     if (LeftShAmt > RightShAmt)
645       return false;
646 
647     const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
648     const unsigned Msb = MsbPlusOne - 1;
649     const unsigned Lsb = RightShAmt - LeftShAmt;
650 
651     SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
652     ReplaceNode(Node, Sbe);
653     return true;
654   }
655 
656   // Transform (sra (sext_inreg X, _), C) ->
657   //           (SignedBitfieldExtract X, msb, lsb)
658   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
659     unsigned ExtSize =
660         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
661 
662     // ExtSize of 32 should use sraiw via tablegen pattern.
663     if (ExtSize == 32)
664       return false;
665 
666     const unsigned Msb = ExtSize - 1;
667     // If the shift-right amount is greater than Msb, it means that extracts
668     // the X[Msb] bit and sign-extend it.
669     const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
670 
671     SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
672     ReplaceNode(Node, Sbe);
673     return true;
674   }
675 
676   return false;
677 }
678 
trySignedBitfieldInsertInMask(SDNode * Node)679 bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) {
680   // Supported only in Xqcibm for now.
681   if (!Subtarget->hasVendorXqcibm())
682     return false;
683 
684   auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
685   if (!N1C)
686     return false;
687 
688   int32_t C1 = N1C->getSExtValue();
689   if (!isShiftedMask_32(C1) || isInt<12>(C1))
690     return false;
691 
692   // INSBI will clobber the input register in N0. Bail out if we need a copy to
693   // preserve this value.
694   SDValue N0 = Node->getOperand(0);
695   if (!N0.hasOneUse())
696     return false;
697 
698   // If C1 is a shifted mask (but can't be formed as an ORI),
699   // use a bitfield insert of -1.
700   // Transform (or x, C1)
701   //        -> (qc.insbi x, -1, width, shift)
702   const unsigned Leading = llvm::countl_zero((uint32_t)C1);
703   const unsigned Trailing = llvm::countr_zero((uint32_t)C1);
704   const unsigned Width = 32 - Leading - Trailing;
705 
706   // If Zbs is enabled and it is a single bit set we can use BSETI which
707   // can be compressed to C_BSETI when Xqcibm in enabled.
708   if (Width == 1 && Subtarget->hasStdExtZbs())
709     return false;
710 
711   SDLoc DL(Node);
712   MVT VT = Node->getSimpleValueType(0);
713 
714   SDValue Ops[] = {N0, CurDAG->getSignedTargetConstant(-1, DL, VT),
715                    CurDAG->getTargetConstant(Width, DL, VT),
716                    CurDAG->getTargetConstant(Trailing, DL, VT)};
717   SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops);
718   ReplaceNode(Node, BitIns);
719   return true;
720 }
721 
trySignedBitfieldInsertInSign(SDNode * Node)722 bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
723   // Only supported with XAndesPerf at the moment.
724   if (!Subtarget->hasVendorXAndesPerf())
725     return false;
726 
727   auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
728   if (!N1C)
729     return false;
730 
731   SDValue N0 = Node->getOperand(0);
732   if (!N0.hasOneUse())
733     return false;
734 
735   auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
736                             const SDLoc &DL, MVT VT) {
737     unsigned Opc = RISCV::NDS_BFOS;
738     // If the Lsb is equal to the Msb, then the Lsb should be 0.
739     if (Lsb == Msb)
740       Lsb = 0;
741     return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
742                                   CurDAG->getTargetConstant(Lsb, DL, VT),
743                                   CurDAG->getTargetConstant(Msb, DL, VT));
744   };
745 
746   SDLoc DL(Node);
747   MVT VT = Node->getSimpleValueType(0);
748   const unsigned RightShAmt = N1C->getZExtValue();
749 
750   // Transform (sra (shl X, C1) C2) with C1 > C2
751   //        -> (NDS.BFOS X, lsb, msb)
752   if (N0.getOpcode() == ISD::SHL) {
753     auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
754     if (!N01C)
755       return false;
756 
757     const unsigned LeftShAmt = N01C->getZExtValue();
758     // Make sure that this is a bitfield insertion (i.e., the shift-right
759     // amount should be less than the left-shift).
760     if (LeftShAmt <= RightShAmt)
761       return false;
762 
763     const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
764     const unsigned Msb = MsbPlusOne - 1;
765     const unsigned Lsb = LeftShAmt - RightShAmt;
766 
767     SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
768     ReplaceNode(Node, Sbi);
769     return true;
770   }
771 
772   return false;
773 }
774 
tryUnsignedBitfieldExtract(SDNode * Node,const SDLoc & DL,MVT VT,SDValue X,unsigned Msb,unsigned Lsb)775 bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
776                                                    const SDLoc &DL, MVT VT,
777                                                    SDValue X, unsigned Msb,
778                                                    unsigned Lsb) {
779   unsigned Opc;
780 
781   if (Subtarget->hasVendorXTHeadBb()) {
782     Opc = RISCV::TH_EXTU;
783   } else if (Subtarget->hasVendorXAndesPerf()) {
784     Opc = RISCV::NDS_BFOZ;
785   } else if (Subtarget->hasVendorXqcibm()) {
786     Opc = RISCV::QC_EXTU;
787     // QC.EXTU X, width, shamt
788     // shamt is the same as Lsb
789     // width is the number of bits to extract from the Lsb
790     Msb = Msb - Lsb + 1;
791   } else {
792     // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
793     return false;
794   }
795 
796   SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
797                                        CurDAG->getTargetConstant(Msb, DL, VT),
798                                        CurDAG->getTargetConstant(Lsb, DL, VT));
799   ReplaceNode(Node, Ube);
800   return true;
801 }
802 
tryUnsignedBitfieldInsertInZero(SDNode * Node,const SDLoc & DL,MVT VT,SDValue X,unsigned Msb,unsigned Lsb)803 bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node,
804                                                         const SDLoc &DL, MVT VT,
805                                                         SDValue X, unsigned Msb,
806                                                         unsigned Lsb) {
807   // Only supported with XAndesPerf at the moment.
808   if (!Subtarget->hasVendorXAndesPerf())
809     return false;
810 
811   unsigned Opc = RISCV::NDS_BFOZ;
812 
813   // If the Lsb is equal to the Msb, then the Lsb should be 0.
814   if (Lsb == Msb)
815     Lsb = 0;
816   SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
817                                        CurDAG->getTargetConstant(Lsb, DL, VT),
818                                        CurDAG->getTargetConstant(Msb, DL, VT));
819   ReplaceNode(Node, Ubi);
820   return true;
821 }
822 
tryIndexedLoad(SDNode * Node)823 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
824   // Target does not support indexed loads.
825   if (!Subtarget->hasVendorXTHeadMemIdx())
826     return false;
827 
828   LoadSDNode *Ld = cast<LoadSDNode>(Node);
829   ISD::MemIndexedMode AM = Ld->getAddressingMode();
830   if (AM == ISD::UNINDEXED)
831     return false;
832 
833   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
834   if (!C)
835     return false;
836 
837   EVT LoadVT = Ld->getMemoryVT();
838   assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
839          "Unexpected addressing mode");
840   bool IsPre = AM == ISD::PRE_INC;
841   bool IsPost = AM == ISD::POST_INC;
842   int64_t Offset = C->getSExtValue();
843 
844   // The constants that can be encoded in the THeadMemIdx instructions
845   // are of the form (sign_extend(imm5) << imm2).
846   unsigned Shift;
847   for (Shift = 0; Shift < 4; Shift++)
848     if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
849       break;
850 
851   // Constant cannot be encoded.
852   if (Shift == 4)
853     return false;
854 
855   bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
856   unsigned Opcode;
857   if (LoadVT == MVT::i8 && IsPre)
858     Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
859   else if (LoadVT == MVT::i8 && IsPost)
860     Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
861   else if (LoadVT == MVT::i16 && IsPre)
862     Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
863   else if (LoadVT == MVT::i16 && IsPost)
864     Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
865   else if (LoadVT == MVT::i32 && IsPre)
866     Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
867   else if (LoadVT == MVT::i32 && IsPost)
868     Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
869   else if (LoadVT == MVT::i64 && IsPre)
870     Opcode = RISCV::TH_LDIB;
871   else if (LoadVT == MVT::i64 && IsPost)
872     Opcode = RISCV::TH_LDIA;
873   else
874     return false;
875 
876   EVT Ty = Ld->getOffset().getValueType();
877   SDValue Ops[] = {
878       Ld->getBasePtr(),
879       CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
880       CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
881   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
882                                        Ld->getValueType(1), MVT::Other, Ops);
883 
884   MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
885   CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
886 
887   ReplaceNode(Node, New);
888 
889   return true;
890 }
891 
selectSF_VC_X_SE(SDNode * Node)892 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
893   if (!Subtarget->hasVInstructions())
894     return;
895 
896   assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
897 
898   SDLoc DL(Node);
899   unsigned IntNo = Node->getConstantOperandVal(1);
900 
901   assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
902           IntNo == Intrinsic::riscv_sf_vc_i_se) &&
903          "Unexpected vsetvli intrinsic");
904 
905   // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
906   unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
907   SDValue SEWOp =
908       CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
909   SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
910                                       Node->getOperand(4), Node->getOperand(5),
911                                       Node->getOperand(8), SEWOp,
912                                       Node->getOperand(0)};
913 
914   unsigned Opcode;
915   auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
916   switch (LMulSDNode->getSExtValue()) {
917   case 5:
918     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
919                                                   : RISCV::PseudoSF_VC_I_SE_MF8;
920     break;
921   case 6:
922     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
923                                                   : RISCV::PseudoSF_VC_I_SE_MF4;
924     break;
925   case 7:
926     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
927                                                   : RISCV::PseudoSF_VC_I_SE_MF2;
928     break;
929   case 0:
930     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
931                                                   : RISCV::PseudoSF_VC_I_SE_M1;
932     break;
933   case 1:
934     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
935                                                   : RISCV::PseudoSF_VC_I_SE_M2;
936     break;
937   case 2:
938     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
939                                                   : RISCV::PseudoSF_VC_I_SE_M4;
940     break;
941   case 3:
942     Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
943                                                   : RISCV::PseudoSF_VC_I_SE_M8;
944     break;
945   }
946 
947   ReplaceNode(Node, CurDAG->getMachineNode(
948                         Opcode, DL, Node->getSimpleValueType(0), Operands));
949 }
950 
getSegInstNF(unsigned Intrinsic)951 static unsigned getSegInstNF(unsigned Intrinsic) {
952 #define INST_NF_CASE(NAME, NF)                                                 \
953   case Intrinsic::riscv_##NAME##NF:                                            \
954     return NF;
955 #define INST_NF_CASE_MASK(NAME, NF)                                            \
956   case Intrinsic::riscv_##NAME##NF##_mask:                                     \
957     return NF;
958 #define INST_NF_CASE_FF(NAME, NF)                                              \
959   case Intrinsic::riscv_##NAME##NF##ff:                                        \
960     return NF;
961 #define INST_NF_CASE_FF_MASK(NAME, NF)                                         \
962   case Intrinsic::riscv_##NAME##NF##ff_mask:                                   \
963     return NF;
964 #define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME)                                \
965   MACRO_NAME(NAME, 2)                                                          \
966   MACRO_NAME(NAME, 3)                                                          \
967   MACRO_NAME(NAME, 4)                                                          \
968   MACRO_NAME(NAME, 5)                                                          \
969   MACRO_NAME(NAME, 6)                                                          \
970   MACRO_NAME(NAME, 7)                                                          \
971   MACRO_NAME(NAME, 8)
972 #define INST_ALL_NF_CASE(NAME)                                                 \
973   INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME)                                    \
974   INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
975 #define INST_ALL_NF_CASE_WITH_FF(NAME)                                         \
976   INST_ALL_NF_CASE(NAME)                                                       \
977   INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME)                                 \
978   INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
979   switch (Intrinsic) {
980   default:
981     llvm_unreachable("Unexpected segment load/store intrinsic");
982     INST_ALL_NF_CASE_WITH_FF(vlseg)
983     INST_ALL_NF_CASE(vlsseg)
984     INST_ALL_NF_CASE(vloxseg)
985     INST_ALL_NF_CASE(vluxseg)
986     INST_ALL_NF_CASE(vsseg)
987     INST_ALL_NF_CASE(vssseg)
988     INST_ALL_NF_CASE(vsoxseg)
989     INST_ALL_NF_CASE(vsuxseg)
990   }
991 }
992 
Select(SDNode * Node)993 void RISCVDAGToDAGISel::Select(SDNode *Node) {
994   // If we have a custom node, we have already selected.
995   if (Node->isMachineOpcode()) {
996     LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
997     Node->setNodeId(-1);
998     return;
999   }
1000 
1001   // Instruction Selection not handled by the auto-generated tablegen selection
1002   // should be handled here.
1003   unsigned Opcode = Node->getOpcode();
1004   MVT XLenVT = Subtarget->getXLenVT();
1005   SDLoc DL(Node);
1006   MVT VT = Node->getSimpleValueType(0);
1007 
1008   bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
1009 
1010   switch (Opcode) {
1011   case ISD::Constant: {
1012     assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1013     auto *ConstNode = cast<ConstantSDNode>(Node);
1014     if (ConstNode->isZero()) {
1015       SDValue New =
1016           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1017       ReplaceNode(Node, New.getNode());
1018       return;
1019     }
1020     int64_t Imm = ConstNode->getSExtValue();
1021     // If only the lower 8 bits are used, try to convert this to a simm6 by
1022     // sign-extending bit 7. This is neutral without the C extension, and
1023     // allows C.LI to be used if C is present.
1024     if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
1025       Imm = SignExtend64<8>(Imm);
1026     // If the upper XLen-16 bits are not used, try to convert this to a simm12
1027     // by sign extending bit 15.
1028     if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
1029         hasAllHUsers(Node))
1030       Imm = SignExtend64<16>(Imm);
1031     // If the upper 32-bits are not used try to convert this into a simm32 by
1032     // sign extending bit 32.
1033     if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1034       Imm = SignExtend64<32>(Imm);
1035 
1036     ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1037     return;
1038   }
1039   case ISD::ConstantFP: {
1040     const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1041 
1042     bool Is64Bit = Subtarget->is64Bit();
1043     bool HasZdinx = Subtarget->hasStdExtZdinx();
1044 
1045     bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1046     SDValue Imm;
1047     // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1048     // create an integer immediate.
1049     if (APF.isPosZero() || NegZeroF64) {
1050       if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1051         Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1052       else
1053         Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1054     } else {
1055       Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1056                       *Subtarget);
1057     }
1058 
1059     unsigned Opc;
1060     switch (VT.SimpleTy) {
1061     default:
1062       llvm_unreachable("Unexpected size");
1063     case MVT::bf16:
1064       assert(Subtarget->hasStdExtZfbfmin());
1065       Opc = RISCV::FMV_H_X;
1066       break;
1067     case MVT::f16:
1068       Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1069       break;
1070     case MVT::f32:
1071       Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1072       break;
1073     case MVT::f64:
1074       // For RV32, we can't move from a GPR, we need to convert instead. This
1075       // should only happen for +0.0 and -0.0.
1076       assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1077       if (HasZdinx)
1078         Opc = RISCV::COPY;
1079       else
1080         Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1081       break;
1082     }
1083 
1084     SDNode *Res;
1085     if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1086       Res =
1087           CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1088     } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1089       Res =
1090           CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1091     } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1092       Res = CurDAG->getMachineNode(
1093           Opc, DL, VT, Imm,
1094           CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1095     else
1096       Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1097 
1098     // For f64 -0.0, we need to insert a fneg.d idiom.
1099     if (NegZeroF64) {
1100       Opc = RISCV::FSGNJN_D;
1101       if (HasZdinx)
1102         Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1103       Res =
1104           CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1105     }
1106 
1107     ReplaceNode(Node, Res);
1108     return;
1109   }
1110   case RISCVISD::BuildGPRPair:
1111   case RISCVISD::BuildPairF64: {
1112     if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1113       break;
1114 
1115     assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1116            "BuildPairF64 only handled here on rv32i_zdinx");
1117 
1118     SDValue Ops[] = {
1119         CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1120         Node->getOperand(0),
1121         CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1122         Node->getOperand(1),
1123         CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1124 
1125     SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1126     ReplaceNode(Node, N);
1127     return;
1128   }
1129   case RISCVISD::SplitGPRPair:
1130   case RISCVISD::SplitF64: {
1131     if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1132       assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1133              "SplitF64 only handled here on rv32i_zdinx");
1134 
1135       if (!SDValue(Node, 0).use_empty()) {
1136         SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1137                                                     Node->getValueType(0),
1138                                                     Node->getOperand(0));
1139         ReplaceUses(SDValue(Node, 0), Lo);
1140       }
1141 
1142       if (!SDValue(Node, 1).use_empty()) {
1143         SDValue Hi = CurDAG->getTargetExtractSubreg(
1144             RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1145         ReplaceUses(SDValue(Node, 1), Hi);
1146       }
1147 
1148       CurDAG->RemoveDeadNode(Node);
1149       return;
1150     }
1151 
1152     assert(Opcode != RISCVISD::SplitGPRPair &&
1153            "SplitGPRPair should already be handled");
1154 
1155     if (!Subtarget->hasStdExtZfa())
1156       break;
1157     assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1158            "Unexpected subtarget");
1159 
1160     // With Zfa, lower to fmv.x.w and fmvh.x.d.
1161     if (!SDValue(Node, 0).use_empty()) {
1162       SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1163                                           Node->getOperand(0));
1164       ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1165     }
1166     if (!SDValue(Node, 1).use_empty()) {
1167       SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1168                                           Node->getOperand(0));
1169       ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1170     }
1171 
1172     CurDAG->RemoveDeadNode(Node);
1173     return;
1174   }
1175   case ISD::SHL: {
1176     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1177     if (!N1C)
1178       break;
1179     SDValue N0 = Node->getOperand(0);
1180     if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1181         !isa<ConstantSDNode>(N0.getOperand(1)))
1182       break;
1183     unsigned ShAmt = N1C->getZExtValue();
1184     uint64_t Mask = N0.getConstantOperandVal(1);
1185 
1186     if (isShiftedMask_64(Mask)) {
1187       unsigned XLen = Subtarget->getXLen();
1188       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1189       unsigned TrailingZeros = llvm::countr_zero(Mask);
1190       if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1191         // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1192         // where C2 has 32 leading zeros and C3 trailing zeros.
1193         SDNode *SRLIW = CurDAG->getMachineNode(
1194             RISCV::SRLIW, DL, VT, N0->getOperand(0),
1195             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1196         SDNode *SLLI = CurDAG->getMachineNode(
1197             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1198             CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1199         ReplaceNode(Node, SLLI);
1200         return;
1201       }
1202       if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1203           XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1204         // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1205         // where C2 has C4 leading zeros and no trailing zeros.
1206         // This is profitable if the "and" was to be lowered to
1207         // (srli (slli X, C4), C4) and not (andi X, C2).
1208         // For "LeadingZeros == 32":
1209         // - with Zba it's just (slli.uw X, C)
1210         // - without Zba a tablegen pattern applies the very same
1211         //   transform as we would have done here
1212         SDNode *SLLI = CurDAG->getMachineNode(
1213             RISCV::SLLI, DL, VT, N0->getOperand(0),
1214             CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1215         SDNode *SRLI = CurDAG->getMachineNode(
1216             RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1217             CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1218         ReplaceNode(Node, SRLI);
1219         return;
1220       }
1221     }
1222     break;
1223   }
1224   case ISD::SRL: {
1225     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1226     if (!N1C)
1227       break;
1228     SDValue N0 = Node->getOperand(0);
1229     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1230       break;
1231     unsigned ShAmt = N1C->getZExtValue();
1232     uint64_t Mask = N0.getConstantOperandVal(1);
1233 
1234     // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1235     // 32 leading zeros and C3 trailing zeros.
1236     if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1237       unsigned XLen = Subtarget->getXLen();
1238       unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1239       unsigned TrailingZeros = llvm::countr_zero(Mask);
1240       if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1241         SDNode *SRLIW = CurDAG->getMachineNode(
1242             RISCV::SRLIW, DL, VT, N0->getOperand(0),
1243             CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1244         SDNode *SLLI = CurDAG->getMachineNode(
1245             RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1246             CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1247         ReplaceNode(Node, SLLI);
1248         return;
1249       }
1250     }
1251 
1252     // Optimize (srl (and X, C2), C) ->
1253     //          (srli (slli X, (XLen-C3), (XLen-C3) + C)
1254     // Where C2 is a mask with C3 trailing ones.
1255     // Taking into account that the C2 may have had lower bits unset by
1256     // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1257     // This pattern occurs when type legalizing right shifts for types with
1258     // less than XLen bits.
1259     Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1260     if (!isMask_64(Mask))
1261       break;
1262     unsigned TrailingOnes = llvm::countr_one(Mask);
1263     if (ShAmt >= TrailingOnes)
1264       break;
1265     // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1266     if (TrailingOnes == 32) {
1267       SDNode *SRLI = CurDAG->getMachineNode(
1268           Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1269           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1270       ReplaceNode(Node, SRLI);
1271       return;
1272     }
1273 
1274     // Only do the remaining transforms if the AND has one use.
1275     if (!N0.hasOneUse())
1276       break;
1277 
1278     // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1279     if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1280       SDNode *BEXTI = CurDAG->getMachineNode(
1281           Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1282           N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1283       ReplaceNode(Node, BEXTI);
1284       return;
1285     }
1286 
1287     const unsigned Msb = TrailingOnes - 1;
1288     const unsigned Lsb = ShAmt;
1289     if (tryUnsignedBitfieldExtract(Node, DL, VT, N0->getOperand(0), Msb, Lsb))
1290       return;
1291 
1292     unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1293     SDNode *SLLI =
1294         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1295                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1296     SDNode *SRLI = CurDAG->getMachineNode(
1297         RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1298         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1299     ReplaceNode(Node, SRLI);
1300     return;
1301   }
1302   case ISD::SRA: {
1303     if (trySignedBitfieldExtract(Node))
1304       return;
1305 
1306     if (trySignedBitfieldInsertInSign(Node))
1307       return;
1308 
1309     // Optimize (sra (sext_inreg X, i16), C) ->
1310     //          (srai (slli X, (XLen-16), (XLen-16) + C)
1311     // And      (sra (sext_inreg X, i8), C) ->
1312     //          (srai (slli X, (XLen-8), (XLen-8) + C)
1313     // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1314     // This transform matches the code we get without Zbb. The shifts are more
1315     // compressible, and this can help expose CSE opportunities in the sdiv by
1316     // constant optimization.
1317     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1318     if (!N1C)
1319       break;
1320     SDValue N0 = Node->getOperand(0);
1321     if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1322       break;
1323     unsigned ShAmt = N1C->getZExtValue();
1324     unsigned ExtSize =
1325         cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1326     // ExtSize of 32 should use sraiw via tablegen pattern.
1327     if (ExtSize >= 32 || ShAmt >= ExtSize)
1328       break;
1329     unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1330     SDNode *SLLI =
1331         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1332                                CurDAG->getTargetConstant(LShAmt, DL, VT));
1333     SDNode *SRAI = CurDAG->getMachineNode(
1334         RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1335         CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1336     ReplaceNode(Node, SRAI);
1337     return;
1338   }
1339   case ISD::OR: {
1340     if (trySignedBitfieldInsertInMask(Node))
1341       return;
1342 
1343     if (tryShrinkShlLogicImm(Node))
1344       return;
1345 
1346     break;
1347   }
1348   case ISD::XOR:
1349     if (tryShrinkShlLogicImm(Node))
1350       return;
1351 
1352     break;
1353   case ISD::AND: {
1354     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1355     if (!N1C)
1356       break;
1357 
1358     SDValue N0 = Node->getOperand(0);
1359 
1360     bool LeftShift = N0.getOpcode() == ISD::SHL;
1361     if (LeftShift || N0.getOpcode() == ISD::SRL) {
1362       auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1363       if (!C)
1364         break;
1365       unsigned C2 = C->getZExtValue();
1366       unsigned XLen = Subtarget->getXLen();
1367       assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1368 
1369       // Keep track of whether this is a c.andi. If we can't use c.andi, the
1370       // shift pair might offer more compression opportunities.
1371       // TODO: We could check for C extension here, but we don't have many lit
1372       // tests with the C extension enabled so not checking gets better
1373       // coverage.
1374       // TODO: What if ANDI faster than shift?
1375       bool IsCANDI = isInt<6>(N1C->getSExtValue());
1376 
1377       uint64_t C1 = N1C->getZExtValue();
1378 
1379       // Clear irrelevant bits in the mask.
1380       if (LeftShift)
1381         C1 &= maskTrailingZeros<uint64_t>(C2);
1382       else
1383         C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1384 
1385       // Some transforms should only be done if the shift has a single use or
1386       // the AND would become (srli (slli X, 32), 32)
1387       bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1388 
1389       SDValue X = N0.getOperand(0);
1390 
1391       // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1392       // with c3 leading zeros.
1393       if (!LeftShift && isMask_64(C1)) {
1394         unsigned Leading = XLen - llvm::bit_width(C1);
1395         if (C2 < Leading) {
1396           // If the number of leading zeros is C2+32 this can be SRLIW.
1397           if (C2 + 32 == Leading) {
1398             SDNode *SRLIW = CurDAG->getMachineNode(
1399                 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1400             ReplaceNode(Node, SRLIW);
1401             return;
1402           }
1403 
1404           // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1405           // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1406           //
1407           // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1408           // legalized and goes through DAG combine.
1409           if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1410               X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1411               cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1412             SDNode *SRAIW =
1413                 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1414                                        CurDAG->getTargetConstant(31, DL, VT));
1415             SDNode *SRLIW = CurDAG->getMachineNode(
1416                 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1417                 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1418             ReplaceNode(Node, SRLIW);
1419             return;
1420           }
1421 
1422           // Try to use an unsigned bitfield extract (e.g., th.extu) if
1423           // available.
1424           // Transform (and (srl x, C2), C1)
1425           //        -> (<bfextract> x, msb, lsb)
1426           //
1427           // Make sure to keep this below the SRLIW cases, as we always want to
1428           // prefer the more common instruction.
1429           const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1430           const unsigned Lsb = C2;
1431           if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1432             return;
1433 
1434           // (srli (slli x, c3-c2), c3).
1435           // Skip if we could use (zext.w (sraiw X, C2)).
1436           bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1437                       X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1438                       cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1439           // Also Skip if we can use bexti or th.tst.
1440           Skip |= HasBitTest && Leading == XLen - 1;
1441           if (OneUseOrZExtW && !Skip) {
1442             SDNode *SLLI = CurDAG->getMachineNode(
1443                 RISCV::SLLI, DL, VT, X,
1444                 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1445             SDNode *SRLI = CurDAG->getMachineNode(
1446                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1447                 CurDAG->getTargetConstant(Leading, DL, VT));
1448             ReplaceNode(Node, SRLI);
1449             return;
1450           }
1451         }
1452       }
1453 
1454       // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1455       // shifted by c2 bits with c3 leading zeros.
1456       if (LeftShift && isShiftedMask_64(C1)) {
1457         unsigned Leading = XLen - llvm::bit_width(C1);
1458 
1459         if (C2 + Leading < XLen &&
1460             C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1461           // Use slli.uw when possible.
1462           if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1463             SDNode *SLLI_UW =
1464                 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1465                                        CurDAG->getTargetConstant(C2, DL, VT));
1466             ReplaceNode(Node, SLLI_UW);
1467             return;
1468           }
1469 
1470           // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1471           // available.
1472           // Transform (and (shl x, c2), c1)
1473           //        -> (<bfinsert> x, msb, lsb)
1474           // e.g.
1475           //     (and (shl x, 12), 0x00fff000)
1476           //     If XLen = 32 and C2 = 12, then
1477           //     Msb = 32 - 8 - 1 = 23 and Lsb = 12
1478           const unsigned Msb = XLen - Leading - 1;
1479           const unsigned Lsb = C2;
1480           if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1481             return;
1482 
1483           // (srli (slli c2+c3), c3)
1484           if (OneUseOrZExtW && !IsCANDI) {
1485             SDNode *SLLI = CurDAG->getMachineNode(
1486                 RISCV::SLLI, DL, VT, X,
1487                 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1488             SDNode *SRLI = CurDAG->getMachineNode(
1489                 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1490                 CurDAG->getTargetConstant(Leading, DL, VT));
1491             ReplaceNode(Node, SRLI);
1492             return;
1493           }
1494         }
1495       }
1496 
1497       // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1498       // shifted mask with c2 leading zeros and c3 trailing zeros.
1499       if (!LeftShift && isShiftedMask_64(C1)) {
1500         unsigned Leading = XLen - llvm::bit_width(C1);
1501         unsigned Trailing = llvm::countr_zero(C1);
1502         if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1503             !IsCANDI) {
1504           unsigned SrliOpc = RISCV::SRLI;
1505           // If the input is zexti32 we should use SRLIW.
1506           if (X.getOpcode() == ISD::AND &&
1507               isa<ConstantSDNode>(X.getOperand(1)) &&
1508               X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1509             SrliOpc = RISCV::SRLIW;
1510             X = X.getOperand(0);
1511           }
1512           SDNode *SRLI = CurDAG->getMachineNode(
1513               SrliOpc, DL, VT, X,
1514               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1515           SDNode *SLLI = CurDAG->getMachineNode(
1516               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1517               CurDAG->getTargetConstant(Trailing, DL, VT));
1518           ReplaceNode(Node, SLLI);
1519           return;
1520         }
1521         // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1522         if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1523             OneUseOrZExtW && !IsCANDI) {
1524           SDNode *SRLIW = CurDAG->getMachineNode(
1525               RISCV::SRLIW, DL, VT, X,
1526               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1527           SDNode *SLLI = CurDAG->getMachineNode(
1528               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1529               CurDAG->getTargetConstant(Trailing, DL, VT));
1530           ReplaceNode(Node, SLLI);
1531           return;
1532         }
1533         // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1534         if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1535             OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1536           SDNode *SRLI = CurDAG->getMachineNode(
1537               RISCV::SRLI, DL, VT, X,
1538               CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1539           SDNode *SLLI_UW = CurDAG->getMachineNode(
1540               RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1541               CurDAG->getTargetConstant(Trailing, DL, VT));
1542           ReplaceNode(Node, SLLI_UW);
1543           return;
1544         }
1545       }
1546 
1547       // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1548       // shifted mask with no leading zeros and c3 trailing zeros.
1549       if (LeftShift && isShiftedMask_64(C1)) {
1550         unsigned Leading = XLen - llvm::bit_width(C1);
1551         unsigned Trailing = llvm::countr_zero(C1);
1552         if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1553           SDNode *SRLI = CurDAG->getMachineNode(
1554               RISCV::SRLI, DL, VT, X,
1555               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1556           SDNode *SLLI = CurDAG->getMachineNode(
1557               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1558               CurDAG->getTargetConstant(Trailing, DL, VT));
1559           ReplaceNode(Node, SLLI);
1560           return;
1561         }
1562         // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1563         if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1564           SDNode *SRLIW = CurDAG->getMachineNode(
1565               RISCV::SRLIW, DL, VT, X,
1566               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1567           SDNode *SLLI = CurDAG->getMachineNode(
1568               RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1569               CurDAG->getTargetConstant(Trailing, DL, VT));
1570           ReplaceNode(Node, SLLI);
1571           return;
1572         }
1573 
1574         // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1575         if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1576             Subtarget->hasStdExtZba()) {
1577           SDNode *SRLI = CurDAG->getMachineNode(
1578               RISCV::SRLI, DL, VT, X,
1579               CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1580           SDNode *SLLI_UW = CurDAG->getMachineNode(
1581               RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1582               CurDAG->getTargetConstant(Trailing, DL, VT));
1583           ReplaceNode(Node, SLLI_UW);
1584           return;
1585         }
1586       }
1587     }
1588 
1589     const uint64_t C1 = N1C->getZExtValue();
1590 
1591     if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1592         N0.hasOneUse()) {
1593       unsigned C2 = N0.getConstantOperandVal(1);
1594       unsigned XLen = Subtarget->getXLen();
1595       assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1596 
1597       SDValue X = N0.getOperand(0);
1598 
1599       // Prefer SRAIW + ANDI when possible.
1600       bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1601                   X.getOpcode() == ISD::SHL &&
1602                   isa<ConstantSDNode>(X.getOperand(1)) &&
1603                   X.getConstantOperandVal(1) == 32;
1604       // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1605       // mask with c3 leading zeros and c2 is larger than c3.
1606       if (isMask_64(C1) && !Skip) {
1607         unsigned Leading = XLen - llvm::bit_width(C1);
1608         if (C2 > Leading) {
1609           SDNode *SRAI = CurDAG->getMachineNode(
1610               RISCV::SRAI, DL, VT, X,
1611               CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1612           SDNode *SRLI = CurDAG->getMachineNode(
1613               RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1614               CurDAG->getTargetConstant(Leading, DL, VT));
1615           ReplaceNode(Node, SRLI);
1616           return;
1617         }
1618       }
1619 
1620       // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1621       // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1622       // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1623       if (isShiftedMask_64(C1) && !Skip) {
1624         unsigned Leading = XLen - llvm::bit_width(C1);
1625         unsigned Trailing = llvm::countr_zero(C1);
1626         if (C2 > Leading && Leading > 0 && Trailing > 0) {
1627           SDNode *SRAI = CurDAG->getMachineNode(
1628               RISCV::SRAI, DL, VT, N0.getOperand(0),
1629               CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1630           SDNode *SRLI = CurDAG->getMachineNode(
1631               RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1632               CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1633           SDNode *SLLI = CurDAG->getMachineNode(
1634               RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1635               CurDAG->getTargetConstant(Trailing, DL, VT));
1636           ReplaceNode(Node, SLLI);
1637           return;
1638         }
1639       }
1640     }
1641 
1642     // If C1 masks off the upper bits only (but can't be formed as an
1643     // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1644     // available.
1645     // Transform (and x, C1)
1646     //        -> (<bfextract> x, msb, lsb)
1647     if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1648       const unsigned Msb = llvm::bit_width(C1) - 1;
1649       if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1650         return;
1651     }
1652 
1653     if (tryShrinkShlLogicImm(Node))
1654       return;
1655 
1656     break;
1657   }
1658   case ISD::MUL: {
1659     // Special case for calculating (mul (and X, C2), C1) where the full product
1660     // fits in XLen bits. We can shift X left by the number of leading zeros in
1661     // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1662     // product has XLen trailing zeros, putting it in the output of MULHU. This
1663     // can avoid materializing a constant in a register for C2.
1664 
1665     // RHS should be a constant.
1666     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1667     if (!N1C || !N1C->hasOneUse())
1668       break;
1669 
1670     // LHS should be an AND with constant.
1671     SDValue N0 = Node->getOperand(0);
1672     if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1673       break;
1674 
1675     uint64_t C2 = N0.getConstantOperandVal(1);
1676 
1677     // Constant should be a mask.
1678     if (!isMask_64(C2))
1679       break;
1680 
1681     // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1682     // multiple users or the constant is a simm12. This prevents inserting a
1683     // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1684     // make it more costly to materialize. Otherwise, using a SLLI might allow
1685     // it to be compressed.
1686     bool IsANDIOrZExt =
1687         isInt<12>(C2) ||
1688         (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1689     // With XTHeadBb, we can use TH.EXTU.
1690     IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1691     if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1692       break;
1693     // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1694     // the constant is a simm32.
1695     bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1696     // With XTHeadBb, we can use TH.EXTU.
1697     IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1698     if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1699       break;
1700 
1701     // We need to shift left the AND input and C1 by a total of XLen bits.
1702 
1703     // How far left do we need to shift the AND input?
1704     unsigned XLen = Subtarget->getXLen();
1705     unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1706 
1707     // The constant gets shifted by the remaining amount unless that would
1708     // shift bits out.
1709     uint64_t C1 = N1C->getZExtValue();
1710     unsigned ConstantShift = XLen - LeadingZeros;
1711     if (ConstantShift > (XLen - llvm::bit_width(C1)))
1712       break;
1713 
1714     uint64_t ShiftedC1 = C1 << ConstantShift;
1715     // If this RV32, we need to sign extend the constant.
1716     if (XLen == 32)
1717       ShiftedC1 = SignExtend64<32>(ShiftedC1);
1718 
1719     // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1720     SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1721     SDNode *SLLI =
1722         CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1723                                CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1724     SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1725                                            SDValue(SLLI, 0), SDValue(Imm, 0));
1726     ReplaceNode(Node, MULHU);
1727     return;
1728   }
1729   case ISD::LOAD: {
1730     if (tryIndexedLoad(Node))
1731       return;
1732 
1733     if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1734       // We match post-incrementing load here
1735       LoadSDNode *Load = cast<LoadSDNode>(Node);
1736       if (Load->getAddressingMode() != ISD::POST_INC)
1737         break;
1738 
1739       SDValue Chain = Node->getOperand(0);
1740       SDValue Base = Node->getOperand(1);
1741       SDValue Offset = Node->getOperand(2);
1742 
1743       bool Simm12 = false;
1744       bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1745 
1746       if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1747         int ConstantVal = ConstantOffset->getSExtValue();
1748         Simm12 = isInt<12>(ConstantVal);
1749         if (Simm12)
1750           Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1751                                              Offset.getValueType());
1752       }
1753 
1754       unsigned Opcode = 0;
1755       switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1756       case MVT::i8:
1757         if (Simm12 && SignExtend)
1758           Opcode = RISCV::CV_LB_ri_inc;
1759         else if (Simm12 && !SignExtend)
1760           Opcode = RISCV::CV_LBU_ri_inc;
1761         else if (!Simm12 && SignExtend)
1762           Opcode = RISCV::CV_LB_rr_inc;
1763         else
1764           Opcode = RISCV::CV_LBU_rr_inc;
1765         break;
1766       case MVT::i16:
1767         if (Simm12 && SignExtend)
1768           Opcode = RISCV::CV_LH_ri_inc;
1769         else if (Simm12 && !SignExtend)
1770           Opcode = RISCV::CV_LHU_ri_inc;
1771         else if (!Simm12 && SignExtend)
1772           Opcode = RISCV::CV_LH_rr_inc;
1773         else
1774           Opcode = RISCV::CV_LHU_rr_inc;
1775         break;
1776       case MVT::i32:
1777         if (Simm12)
1778           Opcode = RISCV::CV_LW_ri_inc;
1779         else
1780           Opcode = RISCV::CV_LW_rr_inc;
1781         break;
1782       default:
1783         break;
1784       }
1785       if (!Opcode)
1786         break;
1787 
1788       ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1789                                                Chain.getSimpleValueType(), Base,
1790                                                Offset, Chain));
1791       return;
1792     }
1793     break;
1794   }
1795   case RISCVISD::LD_RV32: {
1796     assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1797 
1798     SDValue Base, Offset;
1799     SDValue Chain = Node->getOperand(0);
1800     SDValue Addr = Node->getOperand(1);
1801     SelectAddrRegImm(Addr, Base, Offset);
1802 
1803     SDValue Ops[] = {Base, Offset, Chain};
1804     MachineSDNode *New = CurDAG->getMachineNode(
1805         RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1806     SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1807                                                 MVT::i32, SDValue(New, 0));
1808     SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1809                                                 MVT::i32, SDValue(New, 0));
1810     CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1811     ReplaceUses(SDValue(Node, 0), Lo);
1812     ReplaceUses(SDValue(Node, 1), Hi);
1813     ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1814     CurDAG->RemoveDeadNode(Node);
1815     return;
1816   }
1817   case RISCVISD::SD_RV32: {
1818     SDValue Base, Offset;
1819     SDValue Chain = Node->getOperand(0);
1820     SDValue Addr = Node->getOperand(3);
1821     SelectAddrRegImm(Addr, Base, Offset);
1822 
1823     SDValue Lo = Node->getOperand(1);
1824     SDValue Hi = Node->getOperand(2);
1825 
1826     SDValue RegPair;
1827     // Peephole to use X0_Pair for storing zero.
1828     if (isNullConstant(Lo) && isNullConstant(Hi)) {
1829       RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1830     } else {
1831       SDValue Ops[] = {
1832           CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1833           CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1834           CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1835 
1836       RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1837                                                MVT::Untyped, Ops),
1838                         0);
1839     }
1840 
1841     MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1842                                                 {RegPair, Base, Offset, Chain});
1843     CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1844     ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1845     CurDAG->RemoveDeadNode(Node);
1846     return;
1847   }
1848   case ISD::INTRINSIC_WO_CHAIN: {
1849     unsigned IntNo = Node->getConstantOperandVal(0);
1850     switch (IntNo) {
1851       // By default we do not custom select any intrinsic.
1852     default:
1853       break;
1854     case Intrinsic::riscv_vmsgeu:
1855     case Intrinsic::riscv_vmsge: {
1856       SDValue Src1 = Node->getOperand(1);
1857       SDValue Src2 = Node->getOperand(2);
1858       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1859       bool IsCmpConstant = false;
1860       bool IsCmpMinimum = false;
1861       // Only custom select scalar second operand.
1862       if (Src2.getValueType() != XLenVT)
1863         break;
1864       // Small constants are handled with patterns.
1865       int64_t CVal = 0;
1866       MVT Src1VT = Src1.getSimpleValueType();
1867       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1868         IsCmpConstant = true;
1869         CVal = C->getSExtValue();
1870         if (CVal >= -15 && CVal <= 16) {
1871           if (!IsUnsigned || CVal != 0)
1872             break;
1873           IsCmpMinimum = true;
1874         } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1875                                               Src1VT.getScalarSizeInBits())
1876                                               .getSExtValue()) {
1877           IsCmpMinimum = true;
1878         }
1879       }
1880       unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1881       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1882       default:
1883         llvm_unreachable("Unexpected LMUL!");
1884 #define CASE_VMSLT_OPCODES(lmulenum, suffix)                                   \
1885   case RISCVVType::lmulenum:                                                   \
1886     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1887                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1888     VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix                 \
1889                              : RISCV::PseudoVMSGT_VX_##suffix;                 \
1890     break;
1891         CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1892         CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1893         CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1894         CASE_VMSLT_OPCODES(LMUL_1, M1)
1895         CASE_VMSLT_OPCODES(LMUL_2, M2)
1896         CASE_VMSLT_OPCODES(LMUL_4, M4)
1897         CASE_VMSLT_OPCODES(LMUL_8, M8)
1898 #undef CASE_VMSLT_OPCODES
1899       }
1900       // Mask operations use the LMUL from the mask type.
1901       switch (RISCVTargetLowering::getLMUL(VT)) {
1902       default:
1903         llvm_unreachable("Unexpected LMUL!");
1904 #define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix)                            \
1905   case RISCVVType::lmulenum:                                                   \
1906     VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix;                            \
1907     VMSetOpcode = RISCV::PseudoVMSET_M_##suffix;                               \
1908     break;
1909         CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1910         CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1911         CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1912         CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1913         CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1914         CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1915         CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1916 #undef CASE_VMNAND_VMSET_OPCODES
1917       }
1918       SDValue SEW = CurDAG->getTargetConstant(
1919           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1920       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1921       SDValue VL;
1922       selectVLOp(Node->getOperand(3), VL);
1923 
1924       // If vmsge(u) with minimum value, expand it to vmset.
1925       if (IsCmpMinimum) {
1926         ReplaceNode(Node,
1927                     CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1928         return;
1929       }
1930 
1931       if (IsCmpConstant) {
1932         SDValue Imm =
1933             selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1934 
1935         ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1936                                                  {Src1, Imm, VL, SEW}));
1937         return;
1938       }
1939 
1940       // Expand to
1941       // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1942       SDValue Cmp = SDValue(
1943           CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1944           0);
1945       ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1946                                                {Cmp, Cmp, VL, MaskSEW}));
1947       return;
1948     }
1949     case Intrinsic::riscv_vmsgeu_mask:
1950     case Intrinsic::riscv_vmsge_mask: {
1951       SDValue Src1 = Node->getOperand(2);
1952       SDValue Src2 = Node->getOperand(3);
1953       bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1954       bool IsCmpConstant = false;
1955       bool IsCmpMinimum = false;
1956       // Only custom select scalar second operand.
1957       if (Src2.getValueType() != XLenVT)
1958         break;
1959       // Small constants are handled with patterns.
1960       MVT Src1VT = Src1.getSimpleValueType();
1961       int64_t CVal = 0;
1962       if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1963         IsCmpConstant = true;
1964         CVal = C->getSExtValue();
1965         if (CVal >= -15 && CVal <= 16) {
1966           if (!IsUnsigned || CVal != 0)
1967             break;
1968           IsCmpMinimum = true;
1969         } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1970                                               Src1VT.getScalarSizeInBits())
1971                                               .getSExtValue()) {
1972           IsCmpMinimum = true;
1973         }
1974       }
1975       unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1976           VMOROpcode, VMSGTMaskOpcode;
1977       switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1978       default:
1979         llvm_unreachable("Unexpected LMUL!");
1980 #define CASE_VMSLT_OPCODES(lmulenum, suffix)                                   \
1981   case RISCVVType::lmulenum:                                                   \
1982     VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix                 \
1983                              : RISCV::PseudoVMSLT_VX_##suffix;                 \
1984     VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK      \
1985                                  : RISCV::PseudoVMSLT_VX_##suffix##_MASK;      \
1986     VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK      \
1987                                  : RISCV::PseudoVMSGT_VX_##suffix##_MASK;      \
1988     break;
1989         CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1990         CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1991         CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1992         CASE_VMSLT_OPCODES(LMUL_1, M1)
1993         CASE_VMSLT_OPCODES(LMUL_2, M2)
1994         CASE_VMSLT_OPCODES(LMUL_4, M4)
1995         CASE_VMSLT_OPCODES(LMUL_8, M8)
1996 #undef CASE_VMSLT_OPCODES
1997       }
1998       // Mask operations use the LMUL from the mask type.
1999       switch (RISCVTargetLowering::getLMUL(VT)) {
2000       default:
2001         llvm_unreachable("Unexpected LMUL!");
2002 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix)                       \
2003   case RISCVVType::lmulenum:                                                   \
2004     VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix;                              \
2005     VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix;                            \
2006     VMOROpcode = RISCV::PseudoVMOR_MM_##suffix;                                \
2007     break;
2008         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2009         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2010         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2011         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
2012         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
2013         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
2014         CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
2015 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2016       }
2017       SDValue SEW = CurDAG->getTargetConstant(
2018           Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2019       SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2020       SDValue VL;
2021       selectVLOp(Node->getOperand(5), VL);
2022       SDValue MaskedOff = Node->getOperand(1);
2023       SDValue Mask = Node->getOperand(4);
2024 
2025       // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2026       if (IsCmpMinimum) {
2027         // We don't need vmor if the MaskedOff and the Mask are the same
2028         // value.
2029         if (Mask == MaskedOff) {
2030           ReplaceUses(Node, Mask.getNode());
2031           return;
2032         }
2033         ReplaceNode(Node,
2034                     CurDAG->getMachineNode(VMOROpcode, DL, VT,
2035                                            {Mask, MaskedOff, VL, MaskSEW}));
2036         return;
2037       }
2038 
2039       // If the MaskedOff value and the Mask are the same value use
2040       // vmslt{u}.vx vt, va, x;  vmandn.mm vd, vd, vt
2041       // This avoids needing to copy v0 to vd before starting the next sequence.
2042       if (Mask == MaskedOff) {
2043         SDValue Cmp = SDValue(
2044             CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2045             0);
2046         ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2047                                                  {Mask, Cmp, VL, MaskSEW}));
2048         return;
2049       }
2050 
2051       SDValue PolicyOp =
2052           CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2053 
2054       if (IsCmpConstant) {
2055         SDValue Imm =
2056             selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2057 
2058         ReplaceNode(Node, CurDAG->getMachineNode(
2059                               VMSGTMaskOpcode, DL, VT,
2060                               {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2061         return;
2062       }
2063 
2064       // Otherwise use
2065       // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2066       // The result is mask undisturbed.
2067       // We use the same instructions to emulate mask agnostic behavior, because
2068       // the agnostic result can be either undisturbed or all 1.
2069       SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2070                                                    {MaskedOff, Src1, Src2, Mask,
2071                                                     VL, SEW, PolicyOp}),
2072                             0);
2073       // vmxor.mm vd, vd, v0 is used to update active value.
2074       ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2075                                                {Cmp, Mask, VL, MaskSEW}));
2076       return;
2077     }
2078     case Intrinsic::riscv_vsetvli:
2079     case Intrinsic::riscv_vsetvlimax:
2080       return selectVSETVLI(Node);
2081     }
2082     break;
2083   }
2084   case ISD::INTRINSIC_W_CHAIN: {
2085     unsigned IntNo = Node->getConstantOperandVal(1);
2086     switch (IntNo) {
2087       // By default we do not custom select any intrinsic.
2088     default:
2089       break;
2090     case Intrinsic::riscv_vlseg2:
2091     case Intrinsic::riscv_vlseg3:
2092     case Intrinsic::riscv_vlseg4:
2093     case Intrinsic::riscv_vlseg5:
2094     case Intrinsic::riscv_vlseg6:
2095     case Intrinsic::riscv_vlseg7:
2096     case Intrinsic::riscv_vlseg8: {
2097       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2098                   /*IsStrided*/ false);
2099       return;
2100     }
2101     case Intrinsic::riscv_vlseg2_mask:
2102     case Intrinsic::riscv_vlseg3_mask:
2103     case Intrinsic::riscv_vlseg4_mask:
2104     case Intrinsic::riscv_vlseg5_mask:
2105     case Intrinsic::riscv_vlseg6_mask:
2106     case Intrinsic::riscv_vlseg7_mask:
2107     case Intrinsic::riscv_vlseg8_mask: {
2108       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2109                   /*IsStrided*/ false);
2110       return;
2111     }
2112     case Intrinsic::riscv_vlsseg2:
2113     case Intrinsic::riscv_vlsseg3:
2114     case Intrinsic::riscv_vlsseg4:
2115     case Intrinsic::riscv_vlsseg5:
2116     case Intrinsic::riscv_vlsseg6:
2117     case Intrinsic::riscv_vlsseg7:
2118     case Intrinsic::riscv_vlsseg8: {
2119       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2120                   /*IsStrided*/ true);
2121       return;
2122     }
2123     case Intrinsic::riscv_vlsseg2_mask:
2124     case Intrinsic::riscv_vlsseg3_mask:
2125     case Intrinsic::riscv_vlsseg4_mask:
2126     case Intrinsic::riscv_vlsseg5_mask:
2127     case Intrinsic::riscv_vlsseg6_mask:
2128     case Intrinsic::riscv_vlsseg7_mask:
2129     case Intrinsic::riscv_vlsseg8_mask: {
2130       selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2131                   /*IsStrided*/ true);
2132       return;
2133     }
2134     case Intrinsic::riscv_vloxseg2:
2135     case Intrinsic::riscv_vloxseg3:
2136     case Intrinsic::riscv_vloxseg4:
2137     case Intrinsic::riscv_vloxseg5:
2138     case Intrinsic::riscv_vloxseg6:
2139     case Intrinsic::riscv_vloxseg7:
2140     case Intrinsic::riscv_vloxseg8:
2141       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2142                    /*IsOrdered*/ true);
2143       return;
2144     case Intrinsic::riscv_vluxseg2:
2145     case Intrinsic::riscv_vluxseg3:
2146     case Intrinsic::riscv_vluxseg4:
2147     case Intrinsic::riscv_vluxseg5:
2148     case Intrinsic::riscv_vluxseg6:
2149     case Intrinsic::riscv_vluxseg7:
2150     case Intrinsic::riscv_vluxseg8:
2151       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2152                    /*IsOrdered*/ false);
2153       return;
2154     case Intrinsic::riscv_vloxseg2_mask:
2155     case Intrinsic::riscv_vloxseg3_mask:
2156     case Intrinsic::riscv_vloxseg4_mask:
2157     case Intrinsic::riscv_vloxseg5_mask:
2158     case Intrinsic::riscv_vloxseg6_mask:
2159     case Intrinsic::riscv_vloxseg7_mask:
2160     case Intrinsic::riscv_vloxseg8_mask:
2161       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2162                    /*IsOrdered*/ true);
2163       return;
2164     case Intrinsic::riscv_vluxseg2_mask:
2165     case Intrinsic::riscv_vluxseg3_mask:
2166     case Intrinsic::riscv_vluxseg4_mask:
2167     case Intrinsic::riscv_vluxseg5_mask:
2168     case Intrinsic::riscv_vluxseg6_mask:
2169     case Intrinsic::riscv_vluxseg7_mask:
2170     case Intrinsic::riscv_vluxseg8_mask:
2171       selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2172                    /*IsOrdered*/ false);
2173       return;
2174     case Intrinsic::riscv_vlseg8ff:
2175     case Intrinsic::riscv_vlseg7ff:
2176     case Intrinsic::riscv_vlseg6ff:
2177     case Intrinsic::riscv_vlseg5ff:
2178     case Intrinsic::riscv_vlseg4ff:
2179     case Intrinsic::riscv_vlseg3ff:
2180     case Intrinsic::riscv_vlseg2ff: {
2181       selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2182       return;
2183     }
2184     case Intrinsic::riscv_vlseg8ff_mask:
2185     case Intrinsic::riscv_vlseg7ff_mask:
2186     case Intrinsic::riscv_vlseg6ff_mask:
2187     case Intrinsic::riscv_vlseg5ff_mask:
2188     case Intrinsic::riscv_vlseg4ff_mask:
2189     case Intrinsic::riscv_vlseg3ff_mask:
2190     case Intrinsic::riscv_vlseg2ff_mask: {
2191       selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2192       return;
2193     }
2194     case Intrinsic::riscv_vloxei:
2195     case Intrinsic::riscv_vloxei_mask:
2196     case Intrinsic::riscv_vluxei:
2197     case Intrinsic::riscv_vluxei_mask: {
2198       bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2199                       IntNo == Intrinsic::riscv_vluxei_mask;
2200       bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2201                        IntNo == Intrinsic::riscv_vloxei_mask;
2202 
2203       MVT VT = Node->getSimpleValueType(0);
2204       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2205 
2206       unsigned CurOp = 2;
2207       SmallVector<SDValue, 8> Operands;
2208       Operands.push_back(Node->getOperand(CurOp++));
2209 
2210       MVT IndexVT;
2211       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2212                                  /*IsStridedOrIndexed*/ true, Operands,
2213                                  /*IsLoad=*/true, &IndexVT);
2214 
2215       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2216              "Element count mismatch");
2217 
2218       RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2219       RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2220       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2221       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2222         report_fatal_error("The V extension does not support EEW=64 for index "
2223                            "values when XLEN=32");
2224       }
2225       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2226           IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2227           static_cast<unsigned>(IndexLMUL));
2228       MachineSDNode *Load =
2229           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2230 
2231       CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2232 
2233       ReplaceNode(Node, Load);
2234       return;
2235     }
2236     case Intrinsic::riscv_vlm:
2237     case Intrinsic::riscv_vle:
2238     case Intrinsic::riscv_vle_mask:
2239     case Intrinsic::riscv_vlse:
2240     case Intrinsic::riscv_vlse_mask: {
2241       bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2242                       IntNo == Intrinsic::riscv_vlse_mask;
2243       bool IsStrided =
2244           IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2245 
2246       MVT VT = Node->getSimpleValueType(0);
2247       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2248 
2249       // The riscv_vlm intrinsic are always tail agnostic and no passthru
2250       // operand at the IR level.  In pseudos, they have both policy and
2251       // passthru operand. The passthru operand is needed to track the
2252       // "tail undefined" state, and the policy is there just for
2253       // for consistency - it will always be "don't care" for the
2254       // unmasked form.
2255       bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2256       unsigned CurOp = 2;
2257       SmallVector<SDValue, 8> Operands;
2258       if (HasPassthruOperand)
2259         Operands.push_back(Node->getOperand(CurOp++));
2260       else {
2261         // We eagerly lower to implicit_def (instead of undef), as we
2262         // otherwise fail to select nodes such as: nxv1i1 = undef
2263         SDNode *Passthru =
2264           CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2265         Operands.push_back(SDValue(Passthru, 0));
2266       }
2267       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2268                                  Operands, /*IsLoad=*/true);
2269 
2270       RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2271       const RISCV::VLEPseudo *P =
2272           RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2273                               static_cast<unsigned>(LMUL));
2274       MachineSDNode *Load =
2275           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2276 
2277       CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2278 
2279       ReplaceNode(Node, Load);
2280       return;
2281     }
2282     case Intrinsic::riscv_vleff:
2283     case Intrinsic::riscv_vleff_mask: {
2284       bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2285 
2286       MVT VT = Node->getSimpleValueType(0);
2287       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2288 
2289       unsigned CurOp = 2;
2290       SmallVector<SDValue, 7> Operands;
2291       Operands.push_back(Node->getOperand(CurOp++));
2292       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2293                                  /*IsStridedOrIndexed*/ false, Operands,
2294                                  /*IsLoad=*/true);
2295 
2296       RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2297       const RISCV::VLEPseudo *P =
2298           RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2299                               Log2SEW, static_cast<unsigned>(LMUL));
2300       MachineSDNode *Load = CurDAG->getMachineNode(
2301           P->Pseudo, DL, Node->getVTList(), Operands);
2302       CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2303 
2304       ReplaceNode(Node, Load);
2305       return;
2306     }
2307     case Intrinsic::riscv_nds_vln:
2308     case Intrinsic::riscv_nds_vln_mask:
2309     case Intrinsic::riscv_nds_vlnu:
2310     case Intrinsic::riscv_nds_vlnu_mask: {
2311       bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2312                       IntNo == Intrinsic::riscv_nds_vlnu_mask;
2313       bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2314                         IntNo == Intrinsic::riscv_nds_vlnu_mask;
2315 
2316       MVT VT = Node->getSimpleValueType(0);
2317       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2318       unsigned CurOp = 2;
2319       SmallVector<SDValue, 8> Operands;
2320 
2321       Operands.push_back(Node->getOperand(CurOp++));
2322       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2323                                  /*IsStridedOrIndexed=*/false, Operands,
2324                                  /*IsLoad=*/true);
2325 
2326       RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2327       const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2328           IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2329       MachineSDNode *Load =
2330           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2331 
2332       if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2333         CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2334 
2335       ReplaceNode(Node, Load);
2336       return;
2337     }
2338     }
2339     break;
2340   }
2341   case ISD::INTRINSIC_VOID: {
2342     unsigned IntNo = Node->getConstantOperandVal(1);
2343     switch (IntNo) {
2344     case Intrinsic::riscv_vsseg2:
2345     case Intrinsic::riscv_vsseg3:
2346     case Intrinsic::riscv_vsseg4:
2347     case Intrinsic::riscv_vsseg5:
2348     case Intrinsic::riscv_vsseg6:
2349     case Intrinsic::riscv_vsseg7:
2350     case Intrinsic::riscv_vsseg8: {
2351       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2352                   /*IsStrided*/ false);
2353       return;
2354     }
2355     case Intrinsic::riscv_vsseg2_mask:
2356     case Intrinsic::riscv_vsseg3_mask:
2357     case Intrinsic::riscv_vsseg4_mask:
2358     case Intrinsic::riscv_vsseg5_mask:
2359     case Intrinsic::riscv_vsseg6_mask:
2360     case Intrinsic::riscv_vsseg7_mask:
2361     case Intrinsic::riscv_vsseg8_mask: {
2362       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2363                   /*IsStrided*/ false);
2364       return;
2365     }
2366     case Intrinsic::riscv_vssseg2:
2367     case Intrinsic::riscv_vssseg3:
2368     case Intrinsic::riscv_vssseg4:
2369     case Intrinsic::riscv_vssseg5:
2370     case Intrinsic::riscv_vssseg6:
2371     case Intrinsic::riscv_vssseg7:
2372     case Intrinsic::riscv_vssseg8: {
2373       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2374                   /*IsStrided*/ true);
2375       return;
2376     }
2377     case Intrinsic::riscv_vssseg2_mask:
2378     case Intrinsic::riscv_vssseg3_mask:
2379     case Intrinsic::riscv_vssseg4_mask:
2380     case Intrinsic::riscv_vssseg5_mask:
2381     case Intrinsic::riscv_vssseg6_mask:
2382     case Intrinsic::riscv_vssseg7_mask:
2383     case Intrinsic::riscv_vssseg8_mask: {
2384       selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2385                   /*IsStrided*/ true);
2386       return;
2387     }
2388     case Intrinsic::riscv_vsoxseg2:
2389     case Intrinsic::riscv_vsoxseg3:
2390     case Intrinsic::riscv_vsoxseg4:
2391     case Intrinsic::riscv_vsoxseg5:
2392     case Intrinsic::riscv_vsoxseg6:
2393     case Intrinsic::riscv_vsoxseg7:
2394     case Intrinsic::riscv_vsoxseg8:
2395       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2396                    /*IsOrdered*/ true);
2397       return;
2398     case Intrinsic::riscv_vsuxseg2:
2399     case Intrinsic::riscv_vsuxseg3:
2400     case Intrinsic::riscv_vsuxseg4:
2401     case Intrinsic::riscv_vsuxseg5:
2402     case Intrinsic::riscv_vsuxseg6:
2403     case Intrinsic::riscv_vsuxseg7:
2404     case Intrinsic::riscv_vsuxseg8:
2405       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2406                    /*IsOrdered*/ false);
2407       return;
2408     case Intrinsic::riscv_vsoxseg2_mask:
2409     case Intrinsic::riscv_vsoxseg3_mask:
2410     case Intrinsic::riscv_vsoxseg4_mask:
2411     case Intrinsic::riscv_vsoxseg5_mask:
2412     case Intrinsic::riscv_vsoxseg6_mask:
2413     case Intrinsic::riscv_vsoxseg7_mask:
2414     case Intrinsic::riscv_vsoxseg8_mask:
2415       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2416                    /*IsOrdered*/ true);
2417       return;
2418     case Intrinsic::riscv_vsuxseg2_mask:
2419     case Intrinsic::riscv_vsuxseg3_mask:
2420     case Intrinsic::riscv_vsuxseg4_mask:
2421     case Intrinsic::riscv_vsuxseg5_mask:
2422     case Intrinsic::riscv_vsuxseg6_mask:
2423     case Intrinsic::riscv_vsuxseg7_mask:
2424     case Intrinsic::riscv_vsuxseg8_mask:
2425       selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2426                    /*IsOrdered*/ false);
2427       return;
2428     case Intrinsic::riscv_vsoxei:
2429     case Intrinsic::riscv_vsoxei_mask:
2430     case Intrinsic::riscv_vsuxei:
2431     case Intrinsic::riscv_vsuxei_mask: {
2432       bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2433                       IntNo == Intrinsic::riscv_vsuxei_mask;
2434       bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2435                        IntNo == Intrinsic::riscv_vsoxei_mask;
2436 
2437       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2438       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2439 
2440       unsigned CurOp = 2;
2441       SmallVector<SDValue, 8> Operands;
2442       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2443 
2444       MVT IndexVT;
2445       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2446                                  /*IsStridedOrIndexed*/ true, Operands,
2447                                  /*IsLoad=*/false, &IndexVT);
2448 
2449       assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2450              "Element count mismatch");
2451 
2452       RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2453       RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2454       unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2455       if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2456         report_fatal_error("The V extension does not support EEW=64 for index "
2457                            "values when XLEN=32");
2458       }
2459       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2460           IsMasked, IsOrdered, IndexLog2EEW,
2461           static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2462       MachineSDNode *Store =
2463           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2464 
2465       CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2466 
2467       ReplaceNode(Node, Store);
2468       return;
2469     }
2470     case Intrinsic::riscv_vsm:
2471     case Intrinsic::riscv_vse:
2472     case Intrinsic::riscv_vse_mask:
2473     case Intrinsic::riscv_vsse:
2474     case Intrinsic::riscv_vsse_mask: {
2475       bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2476                       IntNo == Intrinsic::riscv_vsse_mask;
2477       bool IsStrided =
2478           IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2479 
2480       MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2481       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2482 
2483       unsigned CurOp = 2;
2484       SmallVector<SDValue, 8> Operands;
2485       Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2486 
2487       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2488                                  Operands);
2489 
2490       RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2491       const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2492           IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2493       MachineSDNode *Store =
2494           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2495       CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2496 
2497       ReplaceNode(Node, Store);
2498       return;
2499     }
2500     case Intrinsic::riscv_sf_vc_x_se:
2501     case Intrinsic::riscv_sf_vc_i_se:
2502       selectSF_VC_X_SE(Node);
2503       return;
2504     }
2505     break;
2506   }
2507   case ISD::BITCAST: {
2508     MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2509     // Just drop bitcasts between vectors if both are fixed or both are
2510     // scalable.
2511     if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2512         (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2513       ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2514       CurDAG->RemoveDeadNode(Node);
2515       return;
2516     }
2517     break;
2518   }
2519   case ISD::INSERT_SUBVECTOR:
2520   case RISCVISD::TUPLE_INSERT: {
2521     SDValue V = Node->getOperand(0);
2522     SDValue SubV = Node->getOperand(1);
2523     SDLoc DL(SubV);
2524     auto Idx = Node->getConstantOperandVal(2);
2525     MVT SubVecVT = SubV.getSimpleValueType();
2526 
2527     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2528     MVT SubVecContainerVT = SubVecVT;
2529     // Establish the correct scalable-vector types for any fixed-length type.
2530     if (SubVecVT.isFixedLengthVector()) {
2531       SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2532       TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2533       [[maybe_unused]] bool ExactlyVecRegSized =
2534           Subtarget->expandVScale(SubVecVT.getSizeInBits())
2535               .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2536       assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2537                                .getKnownMinValue()));
2538       assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2539     }
2540     MVT ContainerVT = VT;
2541     if (VT.isFixedLengthVector())
2542       ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2543 
2544     const auto *TRI = Subtarget->getRegisterInfo();
2545     unsigned SubRegIdx;
2546     std::tie(SubRegIdx, Idx) =
2547         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2548             ContainerVT, SubVecContainerVT, Idx, TRI);
2549 
2550     // If the Idx hasn't been completely eliminated then this is a subvector
2551     // insert which doesn't naturally align to a vector register. These must
2552     // be handled using instructions to manipulate the vector registers.
2553     if (Idx != 0)
2554       break;
2555 
2556     RISCVVType::VLMUL SubVecLMUL =
2557         RISCVTargetLowering::getLMUL(SubVecContainerVT);
2558     [[maybe_unused]] bool IsSubVecPartReg =
2559         SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2560         SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2561         SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2562     assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2563             V.isUndef()) &&
2564            "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2565            "the subvector is smaller than a full-sized register");
2566 
2567     // If we haven't set a SubRegIdx, then we must be going between
2568     // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2569     if (SubRegIdx == RISCV::NoSubRegister) {
2570       unsigned InRegClassID =
2571           RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);
2572       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2573                  InRegClassID &&
2574              "Unexpected subvector extraction");
2575       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2576       SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2577                                                DL, VT, SubV, RC);
2578       ReplaceNode(Node, NewNode);
2579       return;
2580     }
2581 
2582     SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2583     ReplaceNode(Node, Insert.getNode());
2584     return;
2585   }
2586   case ISD::EXTRACT_SUBVECTOR:
2587   case RISCVISD::TUPLE_EXTRACT: {
2588     SDValue V = Node->getOperand(0);
2589     auto Idx = Node->getConstantOperandVal(1);
2590     MVT InVT = V.getSimpleValueType();
2591     SDLoc DL(V);
2592 
2593     const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2594     MVT SubVecContainerVT = VT;
2595     // Establish the correct scalable-vector types for any fixed-length type.
2596     if (VT.isFixedLengthVector()) {
2597       assert(Idx == 0);
2598       SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2599     }
2600     if (InVT.isFixedLengthVector())
2601       InVT = TLI.getContainerForFixedLengthVector(InVT);
2602 
2603     const auto *TRI = Subtarget->getRegisterInfo();
2604     unsigned SubRegIdx;
2605     std::tie(SubRegIdx, Idx) =
2606         RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2607             InVT, SubVecContainerVT, Idx, TRI);
2608 
2609     // If the Idx hasn't been completely eliminated then this is a subvector
2610     // extract which doesn't naturally align to a vector register. These must
2611     // be handled using instructions to manipulate the vector registers.
2612     if (Idx != 0)
2613       break;
2614 
2615     // If we haven't set a SubRegIdx, then we must be going between
2616     // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2617     if (SubRegIdx == RISCV::NoSubRegister) {
2618       unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2619       assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2620                  InRegClassID &&
2621              "Unexpected subvector extraction");
2622       SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2623       SDNode *NewNode =
2624           CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2625       ReplaceNode(Node, NewNode);
2626       return;
2627     }
2628 
2629     SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2630     ReplaceNode(Node, Extract.getNode());
2631     return;
2632   }
2633   case RISCVISD::VMV_S_X_VL:
2634   case RISCVISD::VFMV_S_F_VL:
2635   case RISCVISD::VMV_V_X_VL:
2636   case RISCVISD::VFMV_V_F_VL: {
2637     // Try to match splat of a scalar load to a strided load with stride of x0.
2638     bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2639                         Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2640     if (!Node->getOperand(0).isUndef())
2641       break;
2642     SDValue Src = Node->getOperand(1);
2643     auto *Ld = dyn_cast<LoadSDNode>(Src);
2644     // Can't fold load update node because the second
2645     // output is used so that load update node can't be removed.
2646     if (!Ld || Ld->isIndexed())
2647       break;
2648     EVT MemVT = Ld->getMemoryVT();
2649     // The memory VT should be the same size as the element type.
2650     if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2651       break;
2652     if (!IsProfitableToFold(Src, Node, Node) ||
2653         !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2654       break;
2655 
2656     SDValue VL;
2657     if (IsScalarMove) {
2658       // We could deal with more VL if we update the VSETVLI insert pass to
2659       // avoid introducing more VSETVLI.
2660       if (!isOneConstant(Node->getOperand(2)))
2661         break;
2662       selectVLOp(Node->getOperand(2), VL);
2663     } else
2664       selectVLOp(Node->getOperand(2), VL);
2665 
2666     unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2667     SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2668 
2669     // If VL=1, then we don't need to do a strided load and can just do a
2670     // regular load.
2671     bool IsStrided = !isOneConstant(VL);
2672 
2673     // Only do a strided load if we have optimized zero-stride vector load.
2674     if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2675       break;
2676 
2677     SmallVector<SDValue> Operands = {
2678         SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2679         Ld->getBasePtr()};
2680     if (IsStrided)
2681       Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2682     uint64_t Policy = RISCVVType::MASK_AGNOSTIC | RISCVVType::TAIL_AGNOSTIC;
2683     SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2684     Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2685 
2686     RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2687     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2688         /*IsMasked*/ false, IsStrided, /*FF*/ false,
2689         Log2SEW, static_cast<unsigned>(LMUL));
2690     MachineSDNode *Load =
2691         CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2692     // Update the chain.
2693     ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2694     // Record the mem-refs
2695     CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2696     // Replace the splat with the vlse.
2697     ReplaceNode(Node, Load);
2698     return;
2699   }
2700   case ISD::PREFETCH:
2701     unsigned Locality = Node->getConstantOperandVal(3);
2702     if (Locality > 2)
2703       break;
2704 
2705     auto *LoadStoreMem = cast<MemSDNode>(Node);
2706     MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2707     MMO->setFlags(MachineMemOperand::MONonTemporal);
2708 
2709     int NontemporalLevel = 0;
2710     switch (Locality) {
2711     case 0:
2712       NontemporalLevel = 3; // NTL.ALL
2713       break;
2714     case 1:
2715       NontemporalLevel = 1; // NTL.PALL
2716       break;
2717     case 2:
2718       NontemporalLevel = 0; // NTL.P1
2719       break;
2720     default:
2721       llvm_unreachable("unexpected locality value.");
2722     }
2723 
2724     if (NontemporalLevel & 0b1)
2725       MMO->setFlags(MONontemporalBit0);
2726     if (NontemporalLevel & 0b10)
2727       MMO->setFlags(MONontemporalBit1);
2728     break;
2729   }
2730 
2731   // Select the default instruction.
2732   SelectCode(Node);
2733 }
2734 
SelectInlineAsmMemoryOperand(const SDValue & Op,InlineAsm::ConstraintCode ConstraintID,std::vector<SDValue> & OutOps)2735 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2736     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2737     std::vector<SDValue> &OutOps) {
2738   // Always produce a register and immediate operand, as expected by
2739   // RISCVAsmPrinter::PrintAsmMemoryOperand.
2740   switch (ConstraintID) {
2741   case InlineAsm::ConstraintCode::o:
2742   case InlineAsm::ConstraintCode::m: {
2743     SDValue Op0, Op1;
2744     [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2745     assert(Found && "SelectAddrRegImm should always succeed");
2746     OutOps.push_back(Op0);
2747     OutOps.push_back(Op1);
2748     return false;
2749   }
2750   case InlineAsm::ConstraintCode::A:
2751     OutOps.push_back(Op);
2752     OutOps.push_back(
2753         CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2754     return false;
2755   default:
2756     report_fatal_error("Unexpected asm memory constraint " +
2757                        InlineAsm::getMemConstraintName(ConstraintID));
2758   }
2759 
2760   return true;
2761 }
2762 
SelectAddrFrameIndex(SDValue Addr,SDValue & Base,SDValue & Offset)2763 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2764                                              SDValue &Offset) {
2765   if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2766     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2767     Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2768     return true;
2769   }
2770 
2771   return false;
2772 }
2773 
2774 // Fold constant addresses.
selectConstantAddr(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,const RISCVSubtarget * Subtarget,SDValue Addr,SDValue & Base,SDValue & Offset,bool IsPrefetch=false)2775 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2776                                const MVT VT, const RISCVSubtarget *Subtarget,
2777                                SDValue Addr, SDValue &Base, SDValue &Offset,
2778                                bool IsPrefetch = false) {
2779   if (!isa<ConstantSDNode>(Addr))
2780     return false;
2781 
2782   int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2783 
2784   // If the constant is a simm12, we can fold the whole constant and use X0 as
2785   // the base. If the constant can be materialized with LUI+simm12, use LUI as
2786   // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2787   int64_t Lo12 = SignExtend64<12>(CVal);
2788   int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2789   if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2790     if (IsPrefetch && (Lo12 & 0b11111) != 0)
2791       return false;
2792     if (Hi) {
2793       int64_t Hi20 = (Hi >> 12) & 0xfffff;
2794       Base = SDValue(
2795           CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2796                                  CurDAG->getTargetConstant(Hi20, DL, VT)),
2797           0);
2798     } else {
2799       Base = CurDAG->getRegister(RISCV::X0, VT);
2800     }
2801     Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2802     return true;
2803   }
2804 
2805   // Ask how constant materialization would handle this constant.
2806   RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2807 
2808   // If the last instruction would be an ADDI, we can fold its immediate and
2809   // emit the rest of the sequence as the base.
2810   if (Seq.back().getOpcode() != RISCV::ADDI)
2811     return false;
2812   Lo12 = Seq.back().getImm();
2813   if (IsPrefetch && (Lo12 & 0b11111) != 0)
2814     return false;
2815 
2816   // Drop the last instruction.
2817   Seq.pop_back();
2818   assert(!Seq.empty() && "Expected more instructions in sequence");
2819 
2820   Base = selectImmSeq(CurDAG, DL, VT, Seq);
2821   Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2822   return true;
2823 }
2824 
2825 // Is this ADD instruction only used as the base pointer of scalar loads and
2826 // stores?
isWorthFoldingAdd(SDValue Add)2827 static bool isWorthFoldingAdd(SDValue Add) {
2828   for (auto *User : Add->users()) {
2829     if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2830         User->getOpcode() != ISD::ATOMIC_LOAD &&
2831         User->getOpcode() != ISD::ATOMIC_STORE)
2832       return false;
2833     EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2834     if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2835         VT != MVT::f64)
2836       return false;
2837     // Don't allow stores of the value. It must be used as the address.
2838     if (User->getOpcode() == ISD::STORE &&
2839         cast<StoreSDNode>(User)->getValue() == Add)
2840       return false;
2841     if (User->getOpcode() == ISD::ATOMIC_STORE &&
2842         cast<AtomicSDNode>(User)->getVal() == Add)
2843       return false;
2844     if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
2845       return false;
2846   }
2847 
2848   return true;
2849 }
2850 
SelectAddrRegImm(SDValue Addr,SDValue & Base,SDValue & Offset)2851 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2852                                          SDValue &Offset) {
2853   if (SelectAddrFrameIndex(Addr, Base, Offset))
2854     return true;
2855 
2856   SDLoc DL(Addr);
2857   MVT VT = Addr.getSimpleValueType();
2858 
2859   if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2860     Base = Addr.getOperand(0);
2861     Offset = Addr.getOperand(1);
2862     return true;
2863   }
2864 
2865   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2866     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2867     if (isInt<12>(CVal) && isInt<12>(CVal)) {
2868       Base = Addr.getOperand(0);
2869       if (Base.getOpcode() == RISCVISD::ADD_LO) {
2870         SDValue LoOperand = Base.getOperand(1);
2871         if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2872           // If the Lo in (ADD_LO hi, lo) is a global variable's address
2873           // (its low part, really), then we can rely on the alignment of that
2874           // variable to provide a margin of safety before low part can overflow
2875           // the 12 bits of the load/store offset. Check if CVal falls within
2876           // that margin; if so (low part + CVal) can't overflow.
2877           const DataLayout &DL = CurDAG->getDataLayout();
2878           Align Alignment = commonAlignment(
2879               GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2880           if ((CVal == 0 || Alignment > CVal)) {
2881             int64_t CombinedOffset = CVal + GA->getOffset();
2882             Base = Base.getOperand(0);
2883             Offset = CurDAG->getTargetGlobalAddress(
2884                 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2885                 CombinedOffset, GA->getTargetFlags());
2886             return true;
2887           }
2888         }
2889       }
2890 
2891       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2892         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2893       Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2894       return true;
2895     }
2896   }
2897 
2898   // Handle ADD with large immediates.
2899   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2900     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2901     assert(!isInt<12>(CVal) && "simm12 not already handled?");
2902 
2903     // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2904     // an ADDI for part of the offset and fold the rest into the load/store.
2905     // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2906     if (CVal >= -4096 && CVal <= 4094) {
2907       int64_t Adj = CVal < 0 ? -2048 : 2047;
2908       Base = SDValue(
2909           CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2910                                  CurDAG->getSignedTargetConstant(Adj, DL, VT)),
2911           0);
2912       Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
2913       return true;
2914     }
2915 
2916     // For larger immediates, we might be able to save one instruction from
2917     // constant materialization by folding the Lo12 bits of the immediate into
2918     // the address. We should only do this if the ADD is only used by loads and
2919     // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2920     // separately with the full materialized immediate creating extra
2921     // instructions.
2922     if (isWorthFoldingAdd(Addr) &&
2923         selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2924                            Offset, /*IsPrefetch=*/false)) {
2925       // Insert an ADD instruction with the materialized Hi52 bits.
2926       Base = SDValue(
2927           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2928           0);
2929       return true;
2930     }
2931   }
2932 
2933   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2934                          /*IsPrefetch=*/false))
2935     return true;
2936 
2937   Base = Addr;
2938   Offset = CurDAG->getTargetConstant(0, DL, VT);
2939   return true;
2940 }
2941 
2942 /// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
SelectAddrRegImm9(SDValue Addr,SDValue & Base,SDValue & Offset)2943 bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base,
2944                                           SDValue &Offset) {
2945   // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only
2946   // a 9-bit immediate can be folded.
2947 
2948   SDLoc DL(Addr);
2949   MVT VT = Addr.getSimpleValueType();
2950 
2951   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2952     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2953     if (isUInt<9>(CVal)) {
2954       Base = Addr.getOperand(0);
2955 
2956       // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only
2957       // a 9-bit immediate can be folded.
2958       Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2959       return true;
2960     }
2961   }
2962 
2963   Base = Addr;
2964   Offset = CurDAG->getTargetConstant(0, DL, VT);
2965   return true;
2966 }
2967 
2968 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2969 /// Offset should be all zeros.
SelectAddrRegImmLsb00000(SDValue Addr,SDValue & Base,SDValue & Offset)2970 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2971                                                  SDValue &Offset) {
2972   if (SelectAddrFrameIndex(Addr, Base, Offset))
2973     return true;
2974 
2975   SDLoc DL(Addr);
2976   MVT VT = Addr.getSimpleValueType();
2977 
2978   if (CurDAG->isBaseWithConstantOffset(Addr)) {
2979     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2980     if (isInt<12>(CVal)) {
2981       Base = Addr.getOperand(0);
2982 
2983       // Early-out if not a valid offset.
2984       if ((CVal & 0b11111) != 0) {
2985         Base = Addr;
2986         Offset = CurDAG->getTargetConstant(0, DL, VT);
2987         return true;
2988       }
2989 
2990       if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2991         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2992       Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2993       return true;
2994     }
2995   }
2996 
2997   // Handle ADD with large immediates.
2998   if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2999     int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3000     assert(!isInt<12>(CVal) && "simm12 not already handled?");
3001 
3002     // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3003     // one instruction by folding adjustment (-2048 or 2016) into the address.
3004     if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3005       int64_t Adj = CVal < 0 ? -2048 : 2016;
3006       int64_t AdjustedOffset = CVal - Adj;
3007       Base =
3008           SDValue(CurDAG->getMachineNode(
3009                       RISCV::ADDI, DL, VT, Addr.getOperand(0),
3010                       CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3011                   0);
3012       Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3013       return true;
3014     }
3015 
3016     if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3017                            Offset, /*IsPrefetch=*/true)) {
3018       // Insert an ADD instruction with the materialized Hi52 bits.
3019       Base = SDValue(
3020           CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3021           0);
3022       return true;
3023     }
3024   }
3025 
3026   if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3027                          /*IsPrefetch=*/true))
3028     return true;
3029 
3030   Base = Addr;
3031   Offset = CurDAG->getTargetConstant(0, DL, VT);
3032   return true;
3033 }
3034 
SelectAddrRegRegScale(SDValue Addr,unsigned MaxShiftAmount,SDValue & Base,SDValue & Index,SDValue & Scale)3035 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
3036                                               unsigned MaxShiftAmount,
3037                                               SDValue &Base, SDValue &Index,
3038                                               SDValue &Scale) {
3039   EVT VT = Addr.getSimpleValueType();
3040   auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3041                                               SDValue &Shift) {
3042     uint64_t ShiftAmt = 0;
3043     Index = N;
3044 
3045     if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
3046       // Only match shifts by a value in range [0, MaxShiftAmount].
3047       if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
3048         Index = N.getOperand(0);
3049         ShiftAmt = N.getConstantOperandVal(1);
3050       }
3051     }
3052 
3053     Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3054     return ShiftAmt != 0;
3055   };
3056 
3057   if (Addr.getOpcode() == ISD::ADD) {
3058     if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3059       SDValue AddrB = Addr.getOperand(0);
3060       if (AddrB.getOpcode() == ISD::ADD &&
3061           UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
3062           !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
3063           isInt<12>(C1->getSExtValue())) {
3064         // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3065         SDValue C1Val =
3066             CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
3067         Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3068                                               AddrB.getOperand(1), C1Val),
3069                        0);
3070         return true;
3071       }
3072     } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
3073       Base = Addr.getOperand(1);
3074       return true;
3075     } else {
3076       UnwrapShl(Addr.getOperand(1), Index, Scale);
3077       Base = Addr.getOperand(0);
3078       return true;
3079     }
3080   }
3081 
3082   return false;
3083 }
3084 
SelectAddrRegReg(SDValue Addr,SDValue & Base,SDValue & Offset)3085 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
3086                                          SDValue &Offset) {
3087   if (Addr.getOpcode() != ISD::ADD)
3088     return false;
3089 
3090   if (isa<ConstantSDNode>(Addr.getOperand(1)))
3091     return false;
3092 
3093   Base = Addr.getOperand(0);
3094   Offset = Addr.getOperand(1);
3095   return true;
3096 }
3097 
selectShiftMask(SDValue N,unsigned ShiftWidth,SDValue & ShAmt)3098 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
3099                                         SDValue &ShAmt) {
3100   ShAmt = N;
3101 
3102   // Peek through zext.
3103   if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3104     ShAmt = ShAmt.getOperand(0);
3105 
3106   // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3107   // amount. If there is an AND on the shift amount, we can bypass it if it
3108   // doesn't affect any of those bits.
3109   if (ShAmt.getOpcode() == ISD::AND &&
3110       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3111     const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3112 
3113     // Since the max shift amount is a power of 2 we can subtract 1 to make a
3114     // mask that covers the bits needed to represent all shift amounts.
3115     assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3116     APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3117 
3118     if (ShMask.isSubsetOf(AndMask)) {
3119       ShAmt = ShAmt.getOperand(0);
3120     } else {
3121       // SimplifyDemandedBits may have optimized the mask so try restoring any
3122       // bits that are known zero.
3123       KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3124       if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3125         return true;
3126       ShAmt = ShAmt.getOperand(0);
3127     }
3128   }
3129 
3130   if (ShAmt.getOpcode() == ISD::ADD &&
3131       isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3132     uint64_t Imm = ShAmt.getConstantOperandVal(1);
3133     // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3134     // to avoid the ADD.
3135     if (Imm != 0 && Imm % ShiftWidth == 0) {
3136       ShAmt = ShAmt.getOperand(0);
3137       return true;
3138     }
3139   } else if (ShAmt.getOpcode() == ISD::SUB &&
3140              isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3141     uint64_t Imm = ShAmt.getConstantOperandVal(0);
3142     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3143     // generate a NEG instead of a SUB of a constant.
3144     if (Imm != 0 && Imm % ShiftWidth == 0) {
3145       SDLoc DL(ShAmt);
3146       EVT VT = ShAmt.getValueType();
3147       SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3148       unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3149       MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3150                                                   ShAmt.getOperand(1));
3151       ShAmt = SDValue(Neg, 0);
3152       return true;
3153     }
3154     // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3155     // to generate a NOT instead of a SUB of a constant.
3156     if (Imm % ShiftWidth == ShiftWidth - 1) {
3157       SDLoc DL(ShAmt);
3158       EVT VT = ShAmt.getValueType();
3159       MachineSDNode *Not = CurDAG->getMachineNode(
3160           RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3161           CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3162       ShAmt = SDValue(Not, 0);
3163       return true;
3164     }
3165   }
3166 
3167   return true;
3168 }
3169 
3170 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3171 /// check for equality with 0. This function emits instructions that convert the
3172 /// seteq/setne into something that can be compared with 0.
3173 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3174 /// ISD::SETNE).
selectSETCC(SDValue N,ISD::CondCode ExpectedCCVal,SDValue & Val)3175 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
3176                                     SDValue &Val) {
3177   assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3178          "Unexpected condition code!");
3179 
3180   // We're looking for a setcc.
3181   if (N->getOpcode() != ISD::SETCC)
3182     return false;
3183 
3184   // Must be an equality comparison.
3185   ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3186   if (CCVal != ExpectedCCVal)
3187     return false;
3188 
3189   SDValue LHS = N->getOperand(0);
3190   SDValue RHS = N->getOperand(1);
3191 
3192   if (!LHS.getValueType().isScalarInteger())
3193     return false;
3194 
3195   // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3196   if (isNullConstant(RHS)) {
3197     Val = LHS;
3198     return true;
3199   }
3200 
3201   SDLoc DL(N);
3202 
3203   if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3204     int64_t CVal = C->getSExtValue();
3205     // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3206     // non-zero otherwise.
3207     if (CVal == -2048) {
3208       Val = SDValue(
3209           CurDAG->getMachineNode(
3210               RISCV::XORI, DL, N->getValueType(0), LHS,
3211               CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3212           0);
3213       return true;
3214     }
3215     // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
3216     // LHS is equal to the RHS and non-zero otherwise.
3217     if (isInt<12>(CVal) || CVal == 2048) {
3218       Val = SDValue(
3219           CurDAG->getMachineNode(
3220               RISCV::ADDI, DL, N->getValueType(0), LHS,
3221               CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3222           0);
3223       return true;
3224     }
3225     if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3226       Val = SDValue(
3227           CurDAG->getMachineNode(
3228               RISCV::BINVI, DL, N->getValueType(0), LHS,
3229               CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3230           0);
3231       return true;
3232     }
3233     // Same as the addi case above but for larger immediates (signed 26-bit) use
3234     // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3235     // anything which can be done with a single lui as it might be compressible.
3236     if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3237         (CVal & 0xFFF) != 0) {
3238       Val = SDValue(
3239           CurDAG->getMachineNode(
3240               RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3241               CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3242           0);
3243       return true;
3244     }
3245   }
3246 
3247   // If nothing else we can XOR the LHS and RHS to produce zero if they are
3248   // equal and a non-zero value if they aren't.
3249   Val = SDValue(
3250       CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3251   return true;
3252 }
3253 
selectSExtBits(SDValue N,unsigned Bits,SDValue & Val)3254 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3255   if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3256       cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3257     Val = N.getOperand(0);
3258     return true;
3259   }
3260 
3261   auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3262     if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3263       return N;
3264 
3265     SDValue N0 = N.getOperand(0);
3266     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3267         N.getConstantOperandVal(1) == ShiftAmt &&
3268         N0.getConstantOperandVal(1) == ShiftAmt)
3269       return N0.getOperand(0);
3270 
3271     return N;
3272   };
3273 
3274   MVT VT = N.getSimpleValueType();
3275   if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3276     Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3277     return true;
3278   }
3279 
3280   return false;
3281 }
3282 
selectZExtBits(SDValue N,unsigned Bits,SDValue & Val)3283 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3284   if (N.getOpcode() == ISD::AND) {
3285     auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3286     if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3287       Val = N.getOperand(0);
3288       return true;
3289     }
3290   }
3291   MVT VT = N.getSimpleValueType();
3292   APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3293   if (CurDAG->MaskedValueIsZero(N, Mask)) {
3294     Val = N;
3295     return true;
3296   }
3297 
3298   return false;
3299 }
3300 
3301 /// Look for various patterns that can be done with a SHL that can be folded
3302 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3303 /// SHXADD we are trying to match.
selectSHXADDOp(SDValue N,unsigned ShAmt,SDValue & Val)3304 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
3305                                        SDValue &Val) {
3306   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3307     SDValue N0 = N.getOperand(0);
3308 
3309     if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3310         (LeftShift || N0.getOpcode() == ISD::SRL) &&
3311         isa<ConstantSDNode>(N0.getOperand(1))) {
3312       uint64_t Mask = N.getConstantOperandVal(1);
3313       unsigned C2 = N0.getConstantOperandVal(1);
3314 
3315       unsigned XLen = Subtarget->getXLen();
3316       if (LeftShift)
3317         Mask &= maskTrailingZeros<uint64_t>(C2);
3318       else
3319         Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3320 
3321       if (isShiftedMask_64(Mask)) {
3322         unsigned Leading = XLen - llvm::bit_width(Mask);
3323         unsigned Trailing = llvm::countr_zero(Mask);
3324         if (Trailing != ShAmt)
3325           return false;
3326 
3327         unsigned Opcode;
3328         // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3329         // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3330         // followed by a SHXADD with c3 for the X amount.
3331         if (LeftShift && Leading == 0 && C2 < Trailing)
3332           Opcode = RISCV::SRLI;
3333         // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3334         // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3335         // followed by a SHXADD with c3 for the X amount.
3336         else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3337           Opcode = RISCV::SRLIW;
3338         // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3339         // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3340         // followed by a SHXADD using c3 for the X amount.
3341         else if (!LeftShift && Leading == C2)
3342           Opcode = RISCV::SRLI;
3343         // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3344         // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3345         // followed by a SHXADD using c3 for the X amount.
3346         else if (!LeftShift && Leading == 32 + C2)
3347           Opcode = RISCV::SRLIW;
3348         else
3349           return false;
3350 
3351         SDLoc DL(N);
3352         EVT VT = N.getValueType();
3353         ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3354         Val = SDValue(
3355             CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3356                                    CurDAG->getTargetConstant(ShAmt, DL, VT)),
3357             0);
3358         return true;
3359       }
3360     } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3361                isa<ConstantSDNode>(N0.getOperand(1))) {
3362       uint64_t Mask = N.getConstantOperandVal(1);
3363       unsigned C2 = N0.getConstantOperandVal(1);
3364 
3365       // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3366       // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3367       // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3368       // the X amount.
3369       if (isShiftedMask_64(Mask)) {
3370         unsigned XLen = Subtarget->getXLen();
3371         unsigned Leading = XLen - llvm::bit_width(Mask);
3372         unsigned Trailing = llvm::countr_zero(Mask);
3373         if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3374           SDLoc DL(N);
3375           EVT VT = N.getValueType();
3376           Val = SDValue(CurDAG->getMachineNode(
3377                             RISCV::SRAI, DL, VT, N0.getOperand(0),
3378                             CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3379                         0);
3380           Val = SDValue(CurDAG->getMachineNode(
3381                             RISCV::SRLI, DL, VT, Val,
3382                             CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3383                         0);
3384           return true;
3385         }
3386       }
3387     }
3388   } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3389              (LeftShift || N.getOpcode() == ISD::SRL) &&
3390              isa<ConstantSDNode>(N.getOperand(1))) {
3391     SDValue N0 = N.getOperand(0);
3392     if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3393         isa<ConstantSDNode>(N0.getOperand(1))) {
3394       uint64_t Mask = N0.getConstantOperandVal(1);
3395       if (isShiftedMask_64(Mask)) {
3396         unsigned C1 = N.getConstantOperandVal(1);
3397         unsigned XLen = Subtarget->getXLen();
3398         unsigned Leading = XLen - llvm::bit_width(Mask);
3399         unsigned Trailing = llvm::countr_zero(Mask);
3400         // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3401         // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3402         if (LeftShift && Leading == 32 && Trailing > 0 &&
3403             (Trailing + C1) == ShAmt) {
3404           SDLoc DL(N);
3405           EVT VT = N.getValueType();
3406           Val = SDValue(CurDAG->getMachineNode(
3407                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
3408                             CurDAG->getTargetConstant(Trailing, DL, VT)),
3409                         0);
3410           return true;
3411         }
3412         // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3413         // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3414         if (!LeftShift && Leading == 32 && Trailing > C1 &&
3415             (Trailing - C1) == ShAmt) {
3416           SDLoc DL(N);
3417           EVT VT = N.getValueType();
3418           Val = SDValue(CurDAG->getMachineNode(
3419                             RISCV::SRLIW, DL, VT, N0.getOperand(0),
3420                             CurDAG->getTargetConstant(Trailing, DL, VT)),
3421                         0);
3422           return true;
3423         }
3424       }
3425     }
3426   }
3427 
3428   return false;
3429 }
3430 
3431 /// Look for various patterns that can be done with a SHL that can be folded
3432 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3433 /// SHXADD_UW we are trying to match.
selectSHXADD_UWOp(SDValue N,unsigned ShAmt,SDValue & Val)3434 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3435                                           SDValue &Val) {
3436   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3437       N.hasOneUse()) {
3438     SDValue N0 = N.getOperand(0);
3439     if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3440         N0.hasOneUse()) {
3441       uint64_t Mask = N.getConstantOperandVal(1);
3442       unsigned C2 = N0.getConstantOperandVal(1);
3443 
3444       Mask &= maskTrailingZeros<uint64_t>(C2);
3445 
3446       // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3447       // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3448       // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3449       if (isShiftedMask_64(Mask)) {
3450         unsigned Leading = llvm::countl_zero(Mask);
3451         unsigned Trailing = llvm::countr_zero(Mask);
3452         if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3453           SDLoc DL(N);
3454           EVT VT = N.getValueType();
3455           Val = SDValue(CurDAG->getMachineNode(
3456                             RISCV::SLLI, DL, VT, N0.getOperand(0),
3457                             CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3458                         0);
3459           return true;
3460         }
3461       }
3462     }
3463   }
3464 
3465   return false;
3466 }
3467 
orDisjoint(const SDNode * N) const3468 bool RISCVDAGToDAGISel::orDisjoint(const SDNode *N) const {
3469   assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3470   if (N->getFlags().hasDisjoint())
3471     return true;
3472   return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3473 }
3474 
selectImm64IfCheaper(int64_t Imm,int64_t OrigImm,SDValue N,SDValue & Val)3475 bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3476                                              SDValue N, SDValue &Val) {
3477   int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3478                                             /*CompressionCost=*/true);
3479   int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3480                                         /*CompressionCost=*/true);
3481   if (OrigCost <= Cost)
3482     return false;
3483 
3484   Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3485   return true;
3486 }
3487 
selectZExtImm32(SDValue N,SDValue & Val)3488 bool RISCVDAGToDAGISel::selectZExtImm32(SDValue N, SDValue &Val) {
3489   if (!isa<ConstantSDNode>(N))
3490     return false;
3491   int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3492   if ((Imm >> 31) != 1)
3493     return false;
3494 
3495   for (const SDNode *U : N->users()) {
3496     switch (U->getOpcode()) {
3497     case ISD::ADD:
3498       break;
3499     case ISD::OR:
3500       if (orDisjoint(U))
3501         break;
3502       return false;
3503     default:
3504       return false;
3505     }
3506   }
3507 
3508   return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3509 }
3510 
selectNegImm(SDValue N,SDValue & Val)3511 bool RISCVDAGToDAGISel::selectNegImm(SDValue N, SDValue &Val) {
3512   if (!isa<ConstantSDNode>(N))
3513     return false;
3514   int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3515   if (isInt<32>(Imm))
3516     return false;
3517 
3518   for (const SDNode *U : N->users()) {
3519     switch (U->getOpcode()) {
3520     case ISD::ADD:
3521       break;
3522     case RISCVISD::VMV_V_X_VL:
3523       if (!all_of(U->users(), [](const SDNode *V) {
3524             return V->getOpcode() == ISD::ADD ||
3525                    V->getOpcode() == RISCVISD::ADD_VL;
3526           }))
3527         return false;
3528       break;
3529     default:
3530       return false;
3531     }
3532   }
3533 
3534   return selectImm64IfCheaper(-Imm, Imm, N, Val);
3535 }
3536 
selectInvLogicImm(SDValue N,SDValue & Val)3537 bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
3538   if (!isa<ConstantSDNode>(N))
3539     return false;
3540   int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3541 
3542   // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3543   if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3544     return false;
3545 
3546   // Abandon this transform if the constant is needed elsewhere.
3547   for (const SDNode *U : N->users()) {
3548     switch (U->getOpcode()) {
3549     case ISD::AND:
3550     case ISD::OR:
3551     case ISD::XOR:
3552       if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3553         return false;
3554       break;
3555     case RISCVISD::VMV_V_X_VL:
3556       if (!Subtarget->hasStdExtZvkb())
3557         return false;
3558       if (!all_of(U->users(), [](const SDNode *V) {
3559             return V->getOpcode() == ISD::AND ||
3560                    V->getOpcode() == RISCVISD::AND_VL;
3561           }))
3562         return false;
3563       break;
3564     default:
3565       return false;
3566     }
3567   }
3568 
3569   if (isInt<32>(Imm)) {
3570     Val =
3571         selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3572     return true;
3573   }
3574 
3575   // For 64-bit constants, the instruction sequences get complex,
3576   // so we select inverted only if it's cheaper.
3577   return selectImm64IfCheaper(~Imm, Imm, N, Val);
3578 }
3579 
vectorPseudoHasAllNBitUsers(SDNode * User,unsigned UserOpNo,unsigned Bits,const TargetInstrInfo * TII)3580 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3581                                         unsigned Bits,
3582                                         const TargetInstrInfo *TII) {
3583   unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3584 
3585   if (!MCOpcode)
3586     return false;
3587 
3588   const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3589   const uint64_t TSFlags = MCID.TSFlags;
3590   if (!RISCVII::hasSEWOp(TSFlags))
3591     return false;
3592   assert(RISCVII::hasVLOp(TSFlags));
3593 
3594   unsigned ChainOpIdx = User->getNumOperands() - 1;
3595   bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3596   bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3597   unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3598   const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3599 
3600   if (UserOpNo == VLIdx)
3601     return false;
3602 
3603   auto NumDemandedBits =
3604       RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3605   return NumDemandedBits && Bits >= *NumDemandedBits;
3606 }
3607 
3608 // Return true if all users of this SDNode* only consume the lower \p Bits.
3609 // This can be used to form W instructions for add/sub/mul/shl even when the
3610 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3611 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
3612 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3613 // the add/sub/mul/shl to become non-W instructions. By checking the users we
3614 // may be able to use a W instruction and CSE with the other instruction if
3615 // this has happened. We could try to detect that the CSE opportunity exists
3616 // before doing this, but that would be more complicated.
hasAllNBitUsers(SDNode * Node,unsigned Bits,const unsigned Depth) const3617 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3618                                         const unsigned Depth) const {
3619   assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3620           Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3621           Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3622           Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3623           Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3624           isa<ConstantSDNode>(Node) || Depth != 0) &&
3625          "Unexpected opcode");
3626 
3627   if (Depth >= SelectionDAG::MaxRecursionDepth)
3628     return false;
3629 
3630   // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3631   // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3632   if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3633     return false;
3634 
3635   for (SDUse &Use : Node->uses()) {
3636     SDNode *User = Use.getUser();
3637     // Users of this node should have already been instruction selected
3638     if (!User->isMachineOpcode())
3639       return false;
3640 
3641     // TODO: Add more opcodes?
3642     switch (User->getMachineOpcode()) {
3643     default:
3644       if (vectorPseudoHasAllNBitUsers(User, Use.getOperandNo(), Bits, TII))
3645         break;
3646       return false;
3647     case RISCV::ADDW:
3648     case RISCV::ADDIW:
3649     case RISCV::SUBW:
3650     case RISCV::MULW:
3651     case RISCV::SLLW:
3652     case RISCV::SLLIW:
3653     case RISCV::SRAW:
3654     case RISCV::SRAIW:
3655     case RISCV::SRLW:
3656     case RISCV::SRLIW:
3657     case RISCV::DIVW:
3658     case RISCV::DIVUW:
3659     case RISCV::REMW:
3660     case RISCV::REMUW:
3661     case RISCV::ROLW:
3662     case RISCV::RORW:
3663     case RISCV::RORIW:
3664     case RISCV::CLZW:
3665     case RISCV::CTZW:
3666     case RISCV::CPOPW:
3667     case RISCV::SLLI_UW:
3668     case RISCV::FMV_W_X:
3669     case RISCV::FCVT_H_W:
3670     case RISCV::FCVT_H_W_INX:
3671     case RISCV::FCVT_H_WU:
3672     case RISCV::FCVT_H_WU_INX:
3673     case RISCV::FCVT_S_W:
3674     case RISCV::FCVT_S_W_INX:
3675     case RISCV::FCVT_S_WU:
3676     case RISCV::FCVT_S_WU_INX:
3677     case RISCV::FCVT_D_W:
3678     case RISCV::FCVT_D_W_INX:
3679     case RISCV::FCVT_D_WU:
3680     case RISCV::FCVT_D_WU_INX:
3681     case RISCV::TH_REVW:
3682     case RISCV::TH_SRRIW:
3683       if (Bits >= 32)
3684         break;
3685       return false;
3686     case RISCV::SLL:
3687     case RISCV::SRA:
3688     case RISCV::SRL:
3689     case RISCV::ROL:
3690     case RISCV::ROR:
3691     case RISCV::BSET:
3692     case RISCV::BCLR:
3693     case RISCV::BINV:
3694       // Shift amount operands only use log2(Xlen) bits.
3695       if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3696         break;
3697       return false;
3698     case RISCV::SLLI:
3699       // SLLI only uses the lower (XLen - ShAmt) bits.
3700       if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3701         break;
3702       return false;
3703     case RISCV::ANDI:
3704       if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3705         break;
3706       goto RecCheck;
3707     case RISCV::ORI: {
3708       uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3709       if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3710         break;
3711       [[fallthrough]];
3712     }
3713     case RISCV::AND:
3714     case RISCV::OR:
3715     case RISCV::XOR:
3716     case RISCV::XORI:
3717     case RISCV::ANDN:
3718     case RISCV::ORN:
3719     case RISCV::XNOR:
3720     case RISCV::SH1ADD:
3721     case RISCV::SH2ADD:
3722     case RISCV::SH3ADD:
3723     RecCheck:
3724       if (hasAllNBitUsers(User, Bits, Depth + 1))
3725         break;
3726       return false;
3727     case RISCV::SRLI: {
3728       unsigned ShAmt = User->getConstantOperandVal(1);
3729       // If we are shifting right by less than Bits, and users don't demand any
3730       // bits that were shifted into [Bits-1:0], then we can consider this as an
3731       // N-Bit user.
3732       if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3733         break;
3734       return false;
3735     }
3736     case RISCV::SEXT_B:
3737     case RISCV::PACKH:
3738       if (Bits >= 8)
3739         break;
3740       return false;
3741     case RISCV::SEXT_H:
3742     case RISCV::FMV_H_X:
3743     case RISCV::ZEXT_H_RV32:
3744     case RISCV::ZEXT_H_RV64:
3745     case RISCV::PACKW:
3746       if (Bits >= 16)
3747         break;
3748       return false;
3749     case RISCV::PACK:
3750       if (Bits >= (Subtarget->getXLen() / 2))
3751         break;
3752       return false;
3753     case RISCV::ADD_UW:
3754     case RISCV::SH1ADD_UW:
3755     case RISCV::SH2ADD_UW:
3756     case RISCV::SH3ADD_UW:
3757       // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3758       // 32 bits.
3759       if (Use.getOperandNo() == 0 && Bits >= 32)
3760         break;
3761       return false;
3762     case RISCV::SB:
3763       if (Use.getOperandNo() == 0 && Bits >= 8)
3764         break;
3765       return false;
3766     case RISCV::SH:
3767       if (Use.getOperandNo() == 0 && Bits >= 16)
3768         break;
3769       return false;
3770     case RISCV::SW:
3771       if (Use.getOperandNo() == 0 && Bits >= 32)
3772         break;
3773       return false;
3774     }
3775   }
3776 
3777   return true;
3778 }
3779 
3780 // Select a constant that can be represented as (sign_extend(imm5) << imm2).
selectSimm5Shl2(SDValue N,SDValue & Simm5,SDValue & Shl2)3781 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3782                                         SDValue &Shl2) {
3783   auto *C = dyn_cast<ConstantSDNode>(N);
3784   if (!C)
3785     return false;
3786 
3787   int64_t Offset = C->getSExtValue();
3788   for (unsigned Shift = 0; Shift < 4; Shift++) {
3789     if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
3790       EVT VT = N->getValueType(0);
3791       Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
3792       Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
3793       return true;
3794     }
3795   }
3796 
3797   return false;
3798 }
3799 
3800 // Select VL as a 5 bit immediate or a value that will become a register. This
3801 // allows us to choose between VSETIVLI or VSETVLI later.
selectVLOp(SDValue N,SDValue & VL)3802 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3803   auto *C = dyn_cast<ConstantSDNode>(N);
3804   if (C && isUInt<5>(C->getZExtValue())) {
3805     VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3806                                    N->getValueType(0));
3807   } else if (C && C->isAllOnes()) {
3808     // Treat all ones as VLMax.
3809     VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3810                                          N->getValueType(0));
3811   } else if (isa<RegisterSDNode>(N) &&
3812              cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3813     // All our VL operands use an operand that allows GPRNoX0 or an immediate
3814     // as the register class. Convert X0 to a special immediate to pass the
3815     // MachineVerifier. This is recognized specially by the vsetvli insertion
3816     // pass.
3817     VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3818                                          N->getValueType(0));
3819   } else {
3820     VL = N;
3821   }
3822 
3823   return true;
3824 }
3825 
findVSplat(SDValue N)3826 static SDValue findVSplat(SDValue N) {
3827   if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3828     if (!N.getOperand(0).isUndef())
3829       return SDValue();
3830     N = N.getOperand(1);
3831   }
3832   SDValue Splat = N;
3833   if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3834        Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3835       !Splat.getOperand(0).isUndef())
3836     return SDValue();
3837   assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3838   return Splat;
3839 }
3840 
selectVSplat(SDValue N,SDValue & SplatVal)3841 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3842   SDValue Splat = findVSplat(N);
3843   if (!Splat)
3844     return false;
3845 
3846   SplatVal = Splat.getOperand(1);
3847   return true;
3848 }
3849 
selectVSplatImmHelper(SDValue N,SDValue & SplatVal,SelectionDAG & DAG,const RISCVSubtarget & Subtarget,std::function<bool (int64_t)> ValidateImm,bool Decrement=false)3850 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3851                                   SelectionDAG &DAG,
3852                                   const RISCVSubtarget &Subtarget,
3853                                   std::function<bool(int64_t)> ValidateImm,
3854                                   bool Decrement = false) {
3855   SDValue Splat = findVSplat(N);
3856   if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3857     return false;
3858 
3859   const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3860   assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3861          "Unexpected splat operand type");
3862 
3863   // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3864   // type is wider than the resulting vector element type: an implicit
3865   // truncation first takes place. Therefore, perform a manual
3866   // truncation/sign-extension in order to ignore any truncated bits and catch
3867   // any zero-extended immediate.
3868   // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3869   // sign-extending to (XLenVT -1).
3870   APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3871 
3872   int64_t SplatImm = SplatConst.getSExtValue();
3873 
3874   if (!ValidateImm(SplatImm))
3875     return false;
3876 
3877   if (Decrement)
3878     SplatImm -= 1;
3879 
3880   SplatVal =
3881       DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3882   return true;
3883 }
3884 
selectVSplatSimm5(SDValue N,SDValue & SplatVal)3885 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3886   return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3887                                [](int64_t Imm) { return isInt<5>(Imm); });
3888 }
3889 
selectVSplatSimm5Plus1(SDValue N,SDValue & SplatVal)3890 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3891   return selectVSplatImmHelper(
3892       N, SplatVal, *CurDAG, *Subtarget,
3893       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
3894       /*Decrement=*/true);
3895 }
3896 
selectVSplatSimm5Plus1NoDec(SDValue N,SDValue & SplatVal)3897 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
3898   return selectVSplatImmHelper(
3899       N, SplatVal, *CurDAG, *Subtarget,
3900       [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
3901       /*Decrement=*/false);
3902 }
3903 
selectVSplatSimm5Plus1NonZero(SDValue N,SDValue & SplatVal)3904 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3905                                                       SDValue &SplatVal) {
3906   return selectVSplatImmHelper(
3907       N, SplatVal, *CurDAG, *Subtarget,
3908       [](int64_t Imm) {
3909         return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3910       },
3911       /*Decrement=*/true);
3912 }
3913 
selectVSplatUimm(SDValue N,unsigned Bits,SDValue & SplatVal)3914 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3915                                          SDValue &SplatVal) {
3916   return selectVSplatImmHelper(
3917       N, SplatVal, *CurDAG, *Subtarget,
3918       [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3919 }
3920 
selectVSplatImm64Neg(SDValue N,SDValue & SplatVal)3921 bool RISCVDAGToDAGISel::selectVSplatImm64Neg(SDValue N, SDValue &SplatVal) {
3922   SDValue Splat = findVSplat(N);
3923   return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
3924 }
3925 
selectLow8BitsVSplat(SDValue N,SDValue & SplatVal)3926 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3927   auto IsExtOrTrunc = [](SDValue N) {
3928     switch (N->getOpcode()) {
3929     case ISD::SIGN_EXTEND:
3930     case ISD::ZERO_EXTEND:
3931     // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3932     // inactive elements will be undef.
3933     case RISCVISD::TRUNCATE_VECTOR_VL:
3934     case RISCVISD::VSEXT_VL:
3935     case RISCVISD::VZEXT_VL:
3936       return true;
3937     default:
3938       return false;
3939     }
3940   };
3941 
3942   // We can have multiple nested nodes, so unravel them all if needed.
3943   while (IsExtOrTrunc(N)) {
3944     if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3945       return false;
3946     N = N->getOperand(0);
3947   }
3948 
3949   return selectVSplat(N, SplatVal);
3950 }
3951 
selectScalarFPAsInt(SDValue N,SDValue & Imm)3952 bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
3953   // Allow bitcasts from XLenVT -> FP.
3954   if (N.getOpcode() == ISD::BITCAST &&
3955       N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
3956     Imm = N.getOperand(0);
3957     return true;
3958   }
3959   // Allow moves from XLenVT to FP.
3960   if (N.getOpcode() == RISCVISD::FMV_H_X ||
3961       N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3962     Imm = N.getOperand(0);
3963     return true;
3964   }
3965 
3966   // Otherwise, look for FP constants that can materialized with scalar int.
3967   ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3968   if (!CFP)
3969     return false;
3970   const APFloat &APF = CFP->getValueAPF();
3971   // td can handle +0.0 already.
3972   if (APF.isPosZero())
3973     return false;
3974 
3975   MVT VT = CFP->getSimpleValueType(0);
3976 
3977   MVT XLenVT = Subtarget->getXLenVT();
3978   if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3979     assert(APF.isNegZero() && "Unexpected constant.");
3980     return false;
3981   }
3982   SDLoc DL(N);
3983   Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3984                   *Subtarget);
3985   return true;
3986 }
3987 
selectRVVSimm5(SDValue N,unsigned Width,SDValue & Imm)3988 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3989                                        SDValue &Imm) {
3990   if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3991     int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3992 
3993     if (!isInt<5>(ImmVal))
3994       return false;
3995 
3996     Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
3997                                           Subtarget->getXLenVT());
3998     return true;
3999   }
4000 
4001   return false;
4002 }
4003 
4004 // Try to remove sext.w if the input is a W instruction or can be made into
4005 // a W instruction cheaply.
doPeepholeSExtW(SDNode * N)4006 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4007   // Look for the sext.w pattern, addiw rd, rs1, 0.
4008   if (N->getMachineOpcode() != RISCV::ADDIW ||
4009       !isNullConstant(N->getOperand(1)))
4010     return false;
4011 
4012   SDValue N0 = N->getOperand(0);
4013   if (!N0.isMachineOpcode())
4014     return false;
4015 
4016   switch (N0.getMachineOpcode()) {
4017   default:
4018     break;
4019   case RISCV::ADD:
4020   case RISCV::ADDI:
4021   case RISCV::SUB:
4022   case RISCV::MUL:
4023   case RISCV::SLLI: {
4024     // Convert sext.w+add/sub/mul to their W instructions. This will create
4025     // a new independent instruction. This improves latency.
4026     unsigned Opc;
4027     switch (N0.getMachineOpcode()) {
4028     default:
4029       llvm_unreachable("Unexpected opcode!");
4030     case RISCV::ADD:  Opc = RISCV::ADDW;  break;
4031     case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4032     case RISCV::SUB:  Opc = RISCV::SUBW;  break;
4033     case RISCV::MUL:  Opc = RISCV::MULW;  break;
4034     case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4035     }
4036 
4037     SDValue N00 = N0.getOperand(0);
4038     SDValue N01 = N0.getOperand(1);
4039 
4040     // Shift amount needs to be uimm5.
4041     if (N0.getMachineOpcode() == RISCV::SLLI &&
4042         !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4043       break;
4044 
4045     SDNode *Result =
4046         CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4047                                N00, N01);
4048     ReplaceUses(N, Result);
4049     return true;
4050   }
4051   case RISCV::ADDW:
4052   case RISCV::ADDIW:
4053   case RISCV::SUBW:
4054   case RISCV::MULW:
4055   case RISCV::SLLIW:
4056   case RISCV::PACKW:
4057   case RISCV::TH_MULAW:
4058   case RISCV::TH_MULAH:
4059   case RISCV::TH_MULSW:
4060   case RISCV::TH_MULSH:
4061     if (N0.getValueType() == MVT::i32)
4062       break;
4063 
4064     // Result is already sign extended just remove the sext.w.
4065     // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4066     ReplaceUses(N, N0.getNode());
4067     return true;
4068   }
4069 
4070   return false;
4071 }
4072 
usesAllOnesMask(SDValue MaskOp)4073 static bool usesAllOnesMask(SDValue MaskOp) {
4074   const auto IsVMSet = [](unsigned Opc) {
4075     return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4076            Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4077            Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4078            Opc == RISCV::PseudoVMSET_M_B8;
4079   };
4080 
4081   // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4082   // undefined behaviour if it's the wrong bitwidth, so we could choose to
4083   // assume that it's all-ones? Same applies to its VL.
4084   return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4085 }
4086 
isImplicitDef(SDValue V)4087 static bool isImplicitDef(SDValue V) {
4088   if (!V.isMachineOpcode())
4089     return false;
4090   if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4091     for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4092       if (!isImplicitDef(V.getOperand(I)))
4093         return false;
4094     return true;
4095   }
4096   return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4097 }
4098 
4099 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
4100 // corresponding "unmasked" pseudo versions.
doPeepholeMaskedRVV(MachineSDNode * N)4101 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4102   const RISCV::RISCVMaskedPseudoInfo *I =
4103       RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4104   if (!I)
4105     return false;
4106 
4107   unsigned MaskOpIdx = I->MaskOpIdx;
4108   if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4109     return false;
4110 
4111   // There are two classes of pseudos in the table - compares and
4112   // everything else.  See the comment on RISCVMaskedPseudo for details.
4113   const unsigned Opc = I->UnmaskedPseudo;
4114   const MCInstrDesc &MCID = TII->get(Opc);
4115   const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4116 
4117   const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4118   const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4119 
4120   assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4121           !RISCVII::hasVecPolicyOp(MCID.TSFlags)) &&
4122          "Unmasked pseudo has policy but masked pseudo doesn't?");
4123   assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4124          "Unexpected pseudo structure");
4125   assert(!(HasPassthru && !MaskedHasPassthru) &&
4126          "Unmasked pseudo has passthru but masked pseudo doesn't?");
4127 
4128   SmallVector<SDValue, 8> Ops;
4129   // Skip the passthru operand at index 0 if the unmasked don't have one.
4130   bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4131   bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4132                     RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4133   bool HasChainOp =
4134       N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4135   unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4136   for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4137     // Skip the mask
4138     SDValue Op = N->getOperand(I);
4139     if (I == MaskOpIdx)
4140       continue;
4141     if (DropPolicy && I == LastOpNum)
4142       continue;
4143     Ops.push_back(Op);
4144   }
4145 
4146   MachineSDNode *Result =
4147       CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4148 
4149   if (!N->memoperands_empty())
4150     CurDAG->setNodeMemRefs(Result, N->memoperands());
4151 
4152   Result->setFlags(N->getFlags());
4153   ReplaceUses(N, Result);
4154 
4155   return true;
4156 }
4157 
4158 /// If our passthru is an implicit_def, use noreg instead.  This side
4159 /// steps issues with MachineCSE not being able to CSE expressions with
4160 /// IMPLICIT_DEF operands while preserving the semantic intent. See
4161 /// pr64282 for context. Note that this transform is the last one
4162 /// performed at ISEL DAG to DAG.
doPeepholeNoRegPassThru()4163 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4164   bool MadeChange = false;
4165   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4166 
4167   while (Position != CurDAG->allnodes_begin()) {
4168     SDNode *N = &*--Position;
4169     if (N->use_empty() || !N->isMachineOpcode())
4170       continue;
4171 
4172     const unsigned Opc = N->getMachineOpcode();
4173     if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4174         !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
4175         !isImplicitDef(N->getOperand(0)))
4176       continue;
4177 
4178     SmallVector<SDValue> Ops;
4179     Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4180     for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4181       SDValue Op = N->getOperand(I);
4182       Ops.push_back(Op);
4183     }
4184 
4185     MachineSDNode *Result =
4186       CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4187     Result->setFlags(N->getFlags());
4188     CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4189     ReplaceUses(N, Result);
4190     MadeChange = true;
4191   }
4192   return MadeChange;
4193 }
4194 
4195 
4196 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
4197 // for instruction scheduling.
createRISCVISelDag(RISCVTargetMachine & TM,CodeGenOptLevel OptLevel)4198 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4199                                        CodeGenOptLevel OptLevel) {
4200   return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4201 }
4202 
4203 char RISCVDAGToDAGISelLegacy::ID = 0;
4204 
RISCVDAGToDAGISelLegacy(RISCVTargetMachine & TM,CodeGenOptLevel OptLevel)4205 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4206                                                  CodeGenOptLevel OptLevel)
4207     : SelectionDAGISelLegacy(
4208           ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4209 
4210 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4211