1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISC-V target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMCTargetDesc.h"
16 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVISelLowering.h"
18 #include "RISCVInstrInfo.h"
19 #include "RISCVSelectionDAGInfo.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/IR/IntrinsicsRISCV.h"
22 #include "llvm/Support/Alignment.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26
27 using namespace llvm;
28
29 #define DEBUG_TYPE "riscv-isel"
30 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
31
32 static cl::opt<bool> UsePseudoMovImm(
33 "riscv-use-rematerializable-movimm", cl::Hidden,
34 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
35 "constant materialization"),
36 cl::init(false));
37
38 #define GET_DAGISEL_BODY RISCVDAGToDAGISel
39 #include "RISCVGenDAGISel.inc"
40
PreprocessISelDAG()41 void RISCVDAGToDAGISel::PreprocessISelDAG() {
42 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
43
44 bool MadeChange = false;
45 while (Position != CurDAG->allnodes_begin()) {
46 SDNode *N = &*--Position;
47 if (N->use_empty())
48 continue;
49
50 SDValue Result;
51 switch (N->getOpcode()) {
52 case ISD::SPLAT_VECTOR: {
53 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
54 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
55 MVT VT = N->getSimpleValueType(0);
56 unsigned Opc =
57 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
58 SDLoc DL(N);
59 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
60 SDValue Src = N->getOperand(0);
61 if (VT.isInteger())
62 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
63 N->getOperand(0));
64 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
65 break;
66 }
67 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
68 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
69 // load. Done after lowering and combining so that we have a chance to
70 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
71 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
72 MVT VT = N->getSimpleValueType(0);
73 SDValue Passthru = N->getOperand(0);
74 SDValue Lo = N->getOperand(1);
75 SDValue Hi = N->getOperand(2);
76 SDValue VL = N->getOperand(3);
77 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
78 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
79 "Unexpected VTs!");
80 MachineFunction &MF = CurDAG->getMachineFunction();
81 SDLoc DL(N);
82
83 // Create temporary stack for each expanding node.
84 SDValue StackSlot =
85 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
86 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
87 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
88
89 SDValue Chain = CurDAG->getEntryNode();
90 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
91
92 SDValue OffsetSlot =
93 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
94 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
95 Align(8));
96
97 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
98
99 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
100 SDValue IntID =
101 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
102 SDValue Ops[] = {Chain,
103 IntID,
104 Passthru,
105 StackSlot,
106 CurDAG->getRegister(RISCV::X0, MVT::i64),
107 VL};
108
109 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
110 MVT::i64, MPI, Align(8),
111 MachineMemOperand::MOLoad);
112 break;
113 }
114 case ISD::FP_EXTEND: {
115 // We only have vector patterns for riscv_fpextend_vl in isel.
116 SDLoc DL(N);
117 MVT VT = N->getSimpleValueType(0);
118 if (!VT.isVector())
119 break;
120 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
121 SDValue TrueMask = CurDAG->getNode(
122 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX);
123 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0),
124 TrueMask, VLMAX);
125 break;
126 }
127 }
128
129 if (Result) {
130 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
131 LLVM_DEBUG(N->dump(CurDAG));
132 LLVM_DEBUG(dbgs() << "\nNew: ");
133 LLVM_DEBUG(Result->dump(CurDAG));
134 LLVM_DEBUG(dbgs() << "\n");
135
136 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
137 MadeChange = true;
138 }
139 }
140
141 if (MadeChange)
142 CurDAG->RemoveDeadNodes();
143 }
144
PostprocessISelDAG()145 void RISCVDAGToDAGISel::PostprocessISelDAG() {
146 HandleSDNode Dummy(CurDAG->getRoot());
147 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
148
149 bool MadeChange = false;
150 while (Position != CurDAG->allnodes_begin()) {
151 SDNode *N = &*--Position;
152 // Skip dead nodes and any non-machine opcodes.
153 if (N->use_empty() || !N->isMachineOpcode())
154 continue;
155
156 MadeChange |= doPeepholeSExtW(N);
157
158 // FIXME: This is here only because the VMerge transform doesn't
159 // know how to handle masked true inputs. Once that has been moved
160 // to post-ISEL, this can be deleted as well.
161 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
162 }
163
164 CurDAG->setRoot(Dummy.getValue());
165
166 // After we're done with everything else, convert IMPLICIT_DEF
167 // passthru operands to NoRegister. This is required to workaround
168 // an optimization deficiency in MachineCSE. This really should
169 // be merged back into each of the patterns (i.e. there's no good
170 // reason not to go directly to NoReg), but is being done this way
171 // to allow easy backporting.
172 MadeChange |= doPeepholeNoRegPassThru();
173
174 if (MadeChange)
175 CurDAG->RemoveDeadNodes();
176 }
177
selectImmSeq(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,RISCVMatInt::InstSeq & Seq)178 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
179 RISCVMatInt::InstSeq &Seq) {
180 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
181 for (const RISCVMatInt::Inst &Inst : Seq) {
182 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT);
183 SDNode *Result = nullptr;
184 switch (Inst.getOpndKind()) {
185 case RISCVMatInt::Imm:
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
187 break;
188 case RISCVMatInt::RegX0:
189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
190 CurDAG->getRegister(RISCV::X0, VT));
191 break;
192 case RISCVMatInt::RegReg:
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
194 break;
195 case RISCVMatInt::RegImm:
196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
197 break;
198 }
199
200 // Only the first instruction has X0 as its source.
201 SrcReg = SDValue(Result, 0);
202 }
203
204 return SrcReg;
205 }
206
selectImm(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,int64_t Imm,const RISCVSubtarget & Subtarget)207 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
208 int64_t Imm, const RISCVSubtarget &Subtarget) {
209 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
210
211 // Use a rematerializable pseudo instruction for short sequences if enabled.
212 if (Seq.size() == 2 && UsePseudoMovImm)
213 return SDValue(
214 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
215 CurDAG->getSignedTargetConstant(Imm, DL, VT)),
216 0);
217
218 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
219 // worst an LUI+ADDIW. This will require an extra register, but avoids a
220 // constant pool.
221 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
222 // low and high 32 bits are the same and bit 31 and 63 are set.
223 if (Seq.size() > 3) {
224 unsigned ShiftAmt, AddOpc;
225 RISCVMatInt::InstSeq SeqLo =
226 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
227 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
228 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
229
230 SDValue SLLI = SDValue(
231 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
232 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
233 0);
234 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
235 }
236 }
237
238 // Otherwise, use the original sequence.
239 return selectImmSeq(CurDAG, DL, VT, Seq);
240 }
241
addVectorLoadStoreOperands(SDNode * Node,unsigned Log2SEW,const SDLoc & DL,unsigned CurOp,bool IsMasked,bool IsStridedOrIndexed,SmallVectorImpl<SDValue> & Operands,bool IsLoad,MVT * IndexVT)242 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
243 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
244 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
245 bool IsLoad, MVT *IndexVT) {
246 SDValue Chain = Node->getOperand(0);
247
248 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
249
250 if (IsStridedOrIndexed) {
251 Operands.push_back(Node->getOperand(CurOp++)); // Index.
252 if (IndexVT)
253 *IndexVT = Operands.back()->getSimpleValueType(0);
254 }
255
256 if (IsMasked) {
257 SDValue Mask = Node->getOperand(CurOp++);
258 Operands.push_back(Mask);
259 }
260 SDValue VL;
261 selectVLOp(Node->getOperand(CurOp++), VL);
262 Operands.push_back(VL);
263
264 MVT XLenVT = Subtarget->getXLenVT();
265 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
266 Operands.push_back(SEWOp);
267
268 // At the IR layer, all the masked load intrinsics have policy operands,
269 // none of the others do. All have passthru operands. For our pseudos,
270 // all loads have policy operands.
271 if (IsLoad) {
272 uint64_t Policy = RISCVVType::MASK_AGNOSTIC;
273 if (IsMasked)
274 Policy = Node->getConstantOperandVal(CurOp++);
275 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
276 Operands.push_back(PolicyOp);
277 }
278
279 Operands.push_back(Chain); // Chain.
280 }
281
selectVLSEG(SDNode * Node,unsigned NF,bool IsMasked,bool IsStrided)282 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked,
283 bool IsStrided) {
284 SDLoc DL(Node);
285 MVT VT = Node->getSimpleValueType(0);
286 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
287 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
288
289 unsigned CurOp = 2;
290 SmallVector<SDValue, 8> Operands;
291
292 Operands.push_back(Node->getOperand(CurOp++));
293
294 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
295 Operands, /*IsLoad=*/true);
296
297 const RISCV::VLSEGPseudo *P =
298 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
299 static_cast<unsigned>(LMUL));
300 MachineSDNode *Load =
301 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
302
303 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
304
305 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
306 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
307 CurDAG->RemoveDeadNode(Node);
308 }
309
selectVLSEGFF(SDNode * Node,unsigned NF,bool IsMasked)310 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF,
311 bool IsMasked) {
312 SDLoc DL(Node);
313 MVT VT = Node->getSimpleValueType(0);
314 MVT XLenVT = Subtarget->getXLenVT();
315 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
316 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
317
318 unsigned CurOp = 2;
319 SmallVector<SDValue, 7> Operands;
320
321 Operands.push_back(Node->getOperand(CurOp++));
322
323 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
324 /*IsStridedOrIndexed*/ false, Operands,
325 /*IsLoad=*/true);
326
327 const RISCV::VLSEGPseudo *P =
328 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
329 Log2SEW, static_cast<unsigned>(LMUL));
330 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
331 XLenVT, MVT::Other, Operands);
332
333 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
334
335 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result
336 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL
337 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain
338 CurDAG->RemoveDeadNode(Node);
339 }
340
selectVLXSEG(SDNode * Node,unsigned NF,bool IsMasked,bool IsOrdered)341 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked,
342 bool IsOrdered) {
343 SDLoc DL(Node);
344 MVT VT = Node->getSimpleValueType(0);
345 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
346 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
347
348 unsigned CurOp = 2;
349 SmallVector<SDValue, 8> Operands;
350
351 Operands.push_back(Node->getOperand(CurOp++));
352
353 MVT IndexVT;
354 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
355 /*IsStridedOrIndexed*/ true, Operands,
356 /*IsLoad=*/true, &IndexVT);
357
358 #ifndef NDEBUG
359 // Number of element = RVVBitsPerBlock * LMUL / SEW
360 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
361 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
362 if (DecodedLMUL.second)
363 ContainedTyNumElts /= DecodedLMUL.first;
364 else
365 ContainedTyNumElts *= DecodedLMUL.first;
366 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
367 "Element count mismatch");
368 #endif
369
370 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
371 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
372 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
373 report_fatal_error("The V extension does not support EEW=64 for index "
374 "values when XLEN=32");
375 }
376 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
377 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
378 static_cast<unsigned>(IndexLMUL));
379 MachineSDNode *Load =
380 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
381
382 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
383
384 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
385 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1));
386 CurDAG->RemoveDeadNode(Node);
387 }
388
selectVSSEG(SDNode * Node,unsigned NF,bool IsMasked,bool IsStrided)389 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked,
390 bool IsStrided) {
391 SDLoc DL(Node);
392 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
393 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
394 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
395
396 unsigned CurOp = 2;
397 SmallVector<SDValue, 8> Operands;
398
399 Operands.push_back(Node->getOperand(CurOp++));
400
401 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
402 Operands);
403
404 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
405 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
406 MachineSDNode *Store =
407 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
408
409 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
410
411 ReplaceNode(Node, Store);
412 }
413
selectVSXSEG(SDNode * Node,unsigned NF,bool IsMasked,bool IsOrdered)414 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked,
415 bool IsOrdered) {
416 SDLoc DL(Node);
417 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
418 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1);
419 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
420
421 unsigned CurOp = 2;
422 SmallVector<SDValue, 8> Operands;
423
424 Operands.push_back(Node->getOperand(CurOp++));
425
426 MVT IndexVT;
427 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
428 /*IsStridedOrIndexed*/ true, Operands,
429 /*IsLoad=*/false, &IndexVT);
430
431 #ifndef NDEBUG
432 // Number of element = RVVBitsPerBlock * LMUL / SEW
433 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW;
434 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL);
435 if (DecodedLMUL.second)
436 ContainedTyNumElts /= DecodedLMUL.first;
437 else
438 ContainedTyNumElts *= DecodedLMUL.first;
439 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() &&
440 "Element count mismatch");
441 #endif
442
443 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
444 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
445 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
446 report_fatal_error("The V extension does not support EEW=64 for index "
447 "values when XLEN=32");
448 }
449 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
450 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
451 static_cast<unsigned>(IndexLMUL));
452 MachineSDNode *Store =
453 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
454
455 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
456
457 ReplaceNode(Node, Store);
458 }
459
selectVSETVLI(SDNode * Node)460 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
461 if (!Subtarget->hasVInstructions())
462 return;
463
464 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
465
466 SDLoc DL(Node);
467 MVT XLenVT = Subtarget->getXLenVT();
468
469 unsigned IntNo = Node->getConstantOperandVal(0);
470
471 assert((IntNo == Intrinsic::riscv_vsetvli ||
472 IntNo == Intrinsic::riscv_vsetvlimax) &&
473 "Unexpected vsetvli intrinsic");
474
475 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
476 unsigned Offset = (VLMax ? 1 : 2);
477
478 assert(Node->getNumOperands() == Offset + 2 &&
479 "Unexpected number of operands");
480
481 unsigned SEW =
482 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
483 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>(
484 Node->getConstantOperandVal(Offset + 1) & 0x7);
485
486 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
487 /*MaskAgnostic*/ true);
488 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
489
490 SDValue VLOperand;
491 unsigned Opcode = RISCV::PseudoVSETVLI;
492 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
493 if (auto VLEN = Subtarget->getRealVLen())
494 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
495 VLMax = true;
496 }
497 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
498 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
499 Opcode = RISCV::PseudoVSETVLIX0;
500 } else {
501 VLOperand = Node->getOperand(1);
502
503 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
504 uint64_t AVL = C->getZExtValue();
505 if (isUInt<5>(AVL)) {
506 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
507 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
508 XLenVT, VLImm, VTypeIOp));
509 return;
510 }
511 }
512 }
513
514 ReplaceNode(Node,
515 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
516 }
517
tryShrinkShlLogicImm(SDNode * Node)518 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
519 MVT VT = Node->getSimpleValueType(0);
520 unsigned Opcode = Node->getOpcode();
521 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
522 "Unexpected opcode");
523 SDLoc DL(Node);
524
525 // For operations of the form (x << C1) op C2, check if we can use
526 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
527 SDValue N0 = Node->getOperand(0);
528 SDValue N1 = Node->getOperand(1);
529
530 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
531 if (!Cst)
532 return false;
533
534 int64_t Val = Cst->getSExtValue();
535
536 // Check if immediate can already use ANDI/ORI/XORI.
537 if (isInt<12>(Val))
538 return false;
539
540 SDValue Shift = N0;
541
542 // If Val is simm32 and we have a sext_inreg from i32, then the binop
543 // produces at least 33 sign bits. We can peek through the sext_inreg and use
544 // a SLLIW at the end.
545 bool SignExt = false;
546 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
547 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
548 SignExt = true;
549 Shift = N0.getOperand(0);
550 }
551
552 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
553 return false;
554
555 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
556 if (!ShlCst)
557 return false;
558
559 uint64_t ShAmt = ShlCst->getZExtValue();
560
561 // Make sure that we don't change the operation by removing bits.
562 // This only matters for OR and XOR, AND is unaffected.
563 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
564 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
565 return false;
566
567 int64_t ShiftedVal = Val >> ShAmt;
568 if (!isInt<12>(ShiftedVal))
569 return false;
570
571 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
572 if (SignExt && ShAmt >= 32)
573 return false;
574
575 // Ok, we can reorder to get a smaller immediate.
576 unsigned BinOpc;
577 switch (Opcode) {
578 default: llvm_unreachable("Unexpected opcode");
579 case ISD::AND: BinOpc = RISCV::ANDI; break;
580 case ISD::OR: BinOpc = RISCV::ORI; break;
581 case ISD::XOR: BinOpc = RISCV::XORI; break;
582 }
583
584 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
585
586 SDNode *BinOp = CurDAG->getMachineNode(
587 BinOpc, DL, VT, Shift.getOperand(0),
588 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT));
589 SDNode *SLLI =
590 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
591 CurDAG->getTargetConstant(ShAmt, DL, VT));
592 ReplaceNode(Node, SLLI);
593 return true;
594 }
595
trySignedBitfieldExtract(SDNode * Node)596 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
597 unsigned Opc;
598
599 if (Subtarget->hasVendorXTHeadBb())
600 Opc = RISCV::TH_EXT;
601 else if (Subtarget->hasVendorXAndesPerf())
602 Opc = RISCV::NDS_BFOS;
603 else if (Subtarget->hasVendorXqcibm())
604 Opc = RISCV::QC_EXT;
605 else
606 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
607 return false;
608
609 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
610 if (!N1C)
611 return false;
612
613 SDValue N0 = Node->getOperand(0);
614 if (!N0.hasOneUse())
615 return false;
616
617 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb,
618 const SDLoc &DL, MVT VT) {
619 if (Opc == RISCV::QC_EXT) {
620 // QC.EXT X, width, shamt
621 // shamt is the same as Lsb
622 // width is the number of bits to extract from the Lsb
623 Msb = Msb - Lsb + 1;
624 }
625 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
626 CurDAG->getTargetConstant(Msb, DL, VT),
627 CurDAG->getTargetConstant(Lsb, DL, VT));
628 };
629
630 SDLoc DL(Node);
631 MVT VT = Node->getSimpleValueType(0);
632 const unsigned RightShAmt = N1C->getZExtValue();
633
634 // Transform (sra (shl X, C1) C2) with C1 < C2
635 // -> (SignedBitfieldExtract X, msb, lsb)
636 if (N0.getOpcode() == ISD::SHL) {
637 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
638 if (!N01C)
639 return false;
640
641 const unsigned LeftShAmt = N01C->getZExtValue();
642 // Make sure that this is a bitfield extraction (i.e., the shift-right
643 // amount can not be less than the left-shift).
644 if (LeftShAmt > RightShAmt)
645 return false;
646
647 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
648 const unsigned Msb = MsbPlusOne - 1;
649 const unsigned Lsb = RightShAmt - LeftShAmt;
650
651 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
652 ReplaceNode(Node, Sbe);
653 return true;
654 }
655
656 // Transform (sra (sext_inreg X, _), C) ->
657 // (SignedBitfieldExtract X, msb, lsb)
658 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
659 unsigned ExtSize =
660 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
661
662 // ExtSize of 32 should use sraiw via tablegen pattern.
663 if (ExtSize == 32)
664 return false;
665
666 const unsigned Msb = ExtSize - 1;
667 // If the shift-right amount is greater than Msb, it means that extracts
668 // the X[Msb] bit and sign-extend it.
669 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt;
670
671 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT);
672 ReplaceNode(Node, Sbe);
673 return true;
674 }
675
676 return false;
677 }
678
trySignedBitfieldInsertInMask(SDNode * Node)679 bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) {
680 // Supported only in Xqcibm for now.
681 if (!Subtarget->hasVendorXqcibm())
682 return false;
683
684 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
685 if (!N1C)
686 return false;
687
688 int32_t C1 = N1C->getSExtValue();
689 if (!isShiftedMask_32(C1) || isInt<12>(C1))
690 return false;
691
692 // INSBI will clobber the input register in N0. Bail out if we need a copy to
693 // preserve this value.
694 SDValue N0 = Node->getOperand(0);
695 if (!N0.hasOneUse())
696 return false;
697
698 // If C1 is a shifted mask (but can't be formed as an ORI),
699 // use a bitfield insert of -1.
700 // Transform (or x, C1)
701 // -> (qc.insbi x, -1, width, shift)
702 const unsigned Leading = llvm::countl_zero((uint32_t)C1);
703 const unsigned Trailing = llvm::countr_zero((uint32_t)C1);
704 const unsigned Width = 32 - Leading - Trailing;
705
706 // If Zbs is enabled and it is a single bit set we can use BSETI which
707 // can be compressed to C_BSETI when Xqcibm in enabled.
708 if (Width == 1 && Subtarget->hasStdExtZbs())
709 return false;
710
711 SDLoc DL(Node);
712 MVT VT = Node->getSimpleValueType(0);
713
714 SDValue Ops[] = {N0, CurDAG->getSignedTargetConstant(-1, DL, VT),
715 CurDAG->getTargetConstant(Width, DL, VT),
716 CurDAG->getTargetConstant(Trailing, DL, VT)};
717 SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops);
718 ReplaceNode(Node, BitIns);
719 return true;
720 }
721
trySignedBitfieldInsertInSign(SDNode * Node)722 bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
723 // Only supported with XAndesPerf at the moment.
724 if (!Subtarget->hasVendorXAndesPerf())
725 return false;
726
727 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
728 if (!N1C)
729 return false;
730
731 SDValue N0 = Node->getOperand(0);
732 if (!N0.hasOneUse())
733 return false;
734
735 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb,
736 const SDLoc &DL, MVT VT) {
737 unsigned Opc = RISCV::NDS_BFOS;
738 // If the Lsb is equal to the Msb, then the Lsb should be 0.
739 if (Lsb == Msb)
740 Lsb = 0;
741 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0),
742 CurDAG->getTargetConstant(Lsb, DL, VT),
743 CurDAG->getTargetConstant(Msb, DL, VT));
744 };
745
746 SDLoc DL(Node);
747 MVT VT = Node->getSimpleValueType(0);
748 const unsigned RightShAmt = N1C->getZExtValue();
749
750 // Transform (sra (shl X, C1) C2) with C1 > C2
751 // -> (NDS.BFOS X, lsb, msb)
752 if (N0.getOpcode() == ISD::SHL) {
753 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
754 if (!N01C)
755 return false;
756
757 const unsigned LeftShAmt = N01C->getZExtValue();
758 // Make sure that this is a bitfield insertion (i.e., the shift-right
759 // amount should be less than the left-shift).
760 if (LeftShAmt <= RightShAmt)
761 return false;
762
763 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt;
764 const unsigned Msb = MsbPlusOne - 1;
765 const unsigned Lsb = LeftShAmt - RightShAmt;
766
767 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT);
768 ReplaceNode(Node, Sbi);
769 return true;
770 }
771
772 return false;
773 }
774
tryUnsignedBitfieldExtract(SDNode * Node,const SDLoc & DL,MVT VT,SDValue X,unsigned Msb,unsigned Lsb)775 bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
776 const SDLoc &DL, MVT VT,
777 SDValue X, unsigned Msb,
778 unsigned Lsb) {
779 unsigned Opc;
780
781 if (Subtarget->hasVendorXTHeadBb()) {
782 Opc = RISCV::TH_EXTU;
783 } else if (Subtarget->hasVendorXAndesPerf()) {
784 Opc = RISCV::NDS_BFOZ;
785 } else if (Subtarget->hasVendorXqcibm()) {
786 Opc = RISCV::QC_EXTU;
787 // QC.EXTU X, width, shamt
788 // shamt is the same as Lsb
789 // width is the number of bits to extract from the Lsb
790 Msb = Msb - Lsb + 1;
791 } else {
792 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment.
793 return false;
794 }
795
796 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X,
797 CurDAG->getTargetConstant(Msb, DL, VT),
798 CurDAG->getTargetConstant(Lsb, DL, VT));
799 ReplaceNode(Node, Ube);
800 return true;
801 }
802
tryUnsignedBitfieldInsertInZero(SDNode * Node,const SDLoc & DL,MVT VT,SDValue X,unsigned Msb,unsigned Lsb)803 bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node,
804 const SDLoc &DL, MVT VT,
805 SDValue X, unsigned Msb,
806 unsigned Lsb) {
807 // Only supported with XAndesPerf at the moment.
808 if (!Subtarget->hasVendorXAndesPerf())
809 return false;
810
811 unsigned Opc = RISCV::NDS_BFOZ;
812
813 // If the Lsb is equal to the Msb, then the Lsb should be 0.
814 if (Lsb == Msb)
815 Lsb = 0;
816 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X,
817 CurDAG->getTargetConstant(Lsb, DL, VT),
818 CurDAG->getTargetConstant(Msb, DL, VT));
819 ReplaceNode(Node, Ubi);
820 return true;
821 }
822
tryIndexedLoad(SDNode * Node)823 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
824 // Target does not support indexed loads.
825 if (!Subtarget->hasVendorXTHeadMemIdx())
826 return false;
827
828 LoadSDNode *Ld = cast<LoadSDNode>(Node);
829 ISD::MemIndexedMode AM = Ld->getAddressingMode();
830 if (AM == ISD::UNINDEXED)
831 return false;
832
833 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
834 if (!C)
835 return false;
836
837 EVT LoadVT = Ld->getMemoryVT();
838 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
839 "Unexpected addressing mode");
840 bool IsPre = AM == ISD::PRE_INC;
841 bool IsPost = AM == ISD::POST_INC;
842 int64_t Offset = C->getSExtValue();
843
844 // The constants that can be encoded in the THeadMemIdx instructions
845 // are of the form (sign_extend(imm5) << imm2).
846 unsigned Shift;
847 for (Shift = 0; Shift < 4; Shift++)
848 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
849 break;
850
851 // Constant cannot be encoded.
852 if (Shift == 4)
853 return false;
854
855 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
856 unsigned Opcode;
857 if (LoadVT == MVT::i8 && IsPre)
858 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
859 else if (LoadVT == MVT::i8 && IsPost)
860 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
861 else if (LoadVT == MVT::i16 && IsPre)
862 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
863 else if (LoadVT == MVT::i16 && IsPost)
864 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
865 else if (LoadVT == MVT::i32 && IsPre)
866 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
867 else if (LoadVT == MVT::i32 && IsPost)
868 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
869 else if (LoadVT == MVT::i64 && IsPre)
870 Opcode = RISCV::TH_LDIB;
871 else if (LoadVT == MVT::i64 && IsPost)
872 Opcode = RISCV::TH_LDIA;
873 else
874 return false;
875
876 EVT Ty = Ld->getOffset().getValueType();
877 SDValue Ops[] = {
878 Ld->getBasePtr(),
879 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
880 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()};
881 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
882 Ld->getValueType(1), MVT::Other, Ops);
883
884 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
885 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
886
887 ReplaceNode(Node, New);
888
889 return true;
890 }
891
selectSF_VC_X_SE(SDNode * Node)892 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
893 if (!Subtarget->hasVInstructions())
894 return;
895
896 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
897
898 SDLoc DL(Node);
899 unsigned IntNo = Node->getConstantOperandVal(1);
900
901 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
902 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
903 "Unexpected vsetvli intrinsic");
904
905 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
906 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
907 SDValue SEWOp =
908 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
909 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
910 Node->getOperand(4), Node->getOperand(5),
911 Node->getOperand(8), SEWOp,
912 Node->getOperand(0)};
913
914 unsigned Opcode;
915 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
916 switch (LMulSDNode->getSExtValue()) {
917 case 5:
918 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8
919 : RISCV::PseudoSF_VC_I_SE_MF8;
920 break;
921 case 6:
922 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4
923 : RISCV::PseudoSF_VC_I_SE_MF4;
924 break;
925 case 7:
926 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2
927 : RISCV::PseudoSF_VC_I_SE_MF2;
928 break;
929 case 0:
930 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1
931 : RISCV::PseudoSF_VC_I_SE_M1;
932 break;
933 case 1:
934 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2
935 : RISCV::PseudoSF_VC_I_SE_M2;
936 break;
937 case 2:
938 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4
939 : RISCV::PseudoSF_VC_I_SE_M4;
940 break;
941 case 3:
942 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8
943 : RISCV::PseudoSF_VC_I_SE_M8;
944 break;
945 }
946
947 ReplaceNode(Node, CurDAG->getMachineNode(
948 Opcode, DL, Node->getSimpleValueType(0), Operands));
949 }
950
getSegInstNF(unsigned Intrinsic)951 static unsigned getSegInstNF(unsigned Intrinsic) {
952 #define INST_NF_CASE(NAME, NF) \
953 case Intrinsic::riscv_##NAME##NF: \
954 return NF;
955 #define INST_NF_CASE_MASK(NAME, NF) \
956 case Intrinsic::riscv_##NAME##NF##_mask: \
957 return NF;
958 #define INST_NF_CASE_FF(NAME, NF) \
959 case Intrinsic::riscv_##NAME##NF##ff: \
960 return NF;
961 #define INST_NF_CASE_FF_MASK(NAME, NF) \
962 case Intrinsic::riscv_##NAME##NF##ff_mask: \
963 return NF;
964 #define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \
965 MACRO_NAME(NAME, 2) \
966 MACRO_NAME(NAME, 3) \
967 MACRO_NAME(NAME, 4) \
968 MACRO_NAME(NAME, 5) \
969 MACRO_NAME(NAME, 6) \
970 MACRO_NAME(NAME, 7) \
971 MACRO_NAME(NAME, 8)
972 #define INST_ALL_NF_CASE(NAME) \
973 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \
974 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME)
975 #define INST_ALL_NF_CASE_WITH_FF(NAME) \
976 INST_ALL_NF_CASE(NAME) \
977 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \
978 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME)
979 switch (Intrinsic) {
980 default:
981 llvm_unreachable("Unexpected segment load/store intrinsic");
982 INST_ALL_NF_CASE_WITH_FF(vlseg)
983 INST_ALL_NF_CASE(vlsseg)
984 INST_ALL_NF_CASE(vloxseg)
985 INST_ALL_NF_CASE(vluxseg)
986 INST_ALL_NF_CASE(vsseg)
987 INST_ALL_NF_CASE(vssseg)
988 INST_ALL_NF_CASE(vsoxseg)
989 INST_ALL_NF_CASE(vsuxseg)
990 }
991 }
992
Select(SDNode * Node)993 void RISCVDAGToDAGISel::Select(SDNode *Node) {
994 // If we have a custom node, we have already selected.
995 if (Node->isMachineOpcode()) {
996 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
997 Node->setNodeId(-1);
998 return;
999 }
1000
1001 // Instruction Selection not handled by the auto-generated tablegen selection
1002 // should be handled here.
1003 unsigned Opcode = Node->getOpcode();
1004 MVT XLenVT = Subtarget->getXLenVT();
1005 SDLoc DL(Node);
1006 MVT VT = Node->getSimpleValueType(0);
1007
1008 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
1009
1010 switch (Opcode) {
1011 case ISD::Constant: {
1012 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
1013 auto *ConstNode = cast<ConstantSDNode>(Node);
1014 if (ConstNode->isZero()) {
1015 SDValue New =
1016 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
1017 ReplaceNode(Node, New.getNode());
1018 return;
1019 }
1020 int64_t Imm = ConstNode->getSExtValue();
1021 // If only the lower 8 bits are used, try to convert this to a simm6 by
1022 // sign-extending bit 7. This is neutral without the C extension, and
1023 // allows C.LI to be used if C is present.
1024 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
1025 Imm = SignExtend64<8>(Imm);
1026 // If the upper XLen-16 bits are not used, try to convert this to a simm12
1027 // by sign extending bit 15.
1028 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
1029 hasAllHUsers(Node))
1030 Imm = SignExtend64<16>(Imm);
1031 // If the upper 32-bits are not used try to convert this into a simm32 by
1032 // sign extending bit 32.
1033 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
1034 Imm = SignExtend64<32>(Imm);
1035
1036 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
1037 return;
1038 }
1039 case ISD::ConstantFP: {
1040 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
1041
1042 bool Is64Bit = Subtarget->is64Bit();
1043 bool HasZdinx = Subtarget->hasStdExtZdinx();
1044
1045 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
1046 SDValue Imm;
1047 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
1048 // create an integer immediate.
1049 if (APF.isPosZero() || NegZeroF64) {
1050 if (VT == MVT::f64 && HasZdinx && !Is64Bit)
1051 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64);
1052 else
1053 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
1054 } else {
1055 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
1056 *Subtarget);
1057 }
1058
1059 unsigned Opc;
1060 switch (VT.SimpleTy) {
1061 default:
1062 llvm_unreachable("Unexpected size");
1063 case MVT::bf16:
1064 assert(Subtarget->hasStdExtZfbfmin());
1065 Opc = RISCV::FMV_H_X;
1066 break;
1067 case MVT::f16:
1068 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
1069 break;
1070 case MVT::f32:
1071 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
1072 break;
1073 case MVT::f64:
1074 // For RV32, we can't move from a GPR, we need to convert instead. This
1075 // should only happen for +0.0 and -0.0.
1076 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
1077 if (HasZdinx)
1078 Opc = RISCV::COPY;
1079 else
1080 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W;
1081 break;
1082 }
1083
1084 SDNode *Res;
1085 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) {
1086 Res =
1087 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode();
1088 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) {
1089 Res =
1090 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode();
1091 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
1092 Res = CurDAG->getMachineNode(
1093 Opc, DL, VT, Imm,
1094 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
1095 else
1096 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1097
1098 // For f64 -0.0, we need to insert a fneg.d idiom.
1099 if (NegZeroF64) {
1100 Opc = RISCV::FSGNJN_D;
1101 if (HasZdinx)
1102 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1103 Res =
1104 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1105 }
1106
1107 ReplaceNode(Node, Res);
1108 return;
1109 }
1110 case RISCVISD::BuildGPRPair:
1111 case RISCVISD::BuildPairF64: {
1112 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx())
1113 break;
1114
1115 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) &&
1116 "BuildPairF64 only handled here on rv32i_zdinx");
1117
1118 SDValue Ops[] = {
1119 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1120 Node->getOperand(0),
1121 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1122 Node->getOperand(1),
1123 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1124
1125 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops);
1126 ReplaceNode(Node, N);
1127 return;
1128 }
1129 case RISCVISD::SplitGPRPair:
1130 case RISCVISD::SplitF64: {
1131 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) {
1132 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) &&
1133 "SplitF64 only handled here on rv32i_zdinx");
1134
1135 if (!SDValue(Node, 0).use_empty()) {
1136 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1137 Node->getValueType(0),
1138 Node->getOperand(0));
1139 ReplaceUses(SDValue(Node, 0), Lo);
1140 }
1141
1142 if (!SDValue(Node, 1).use_empty()) {
1143 SDValue Hi = CurDAG->getTargetExtractSubreg(
1144 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0));
1145 ReplaceUses(SDValue(Node, 1), Hi);
1146 }
1147
1148 CurDAG->RemoveDeadNode(Node);
1149 return;
1150 }
1151
1152 assert(Opcode != RISCVISD::SplitGPRPair &&
1153 "SplitGPRPair should already be handled");
1154
1155 if (!Subtarget->hasStdExtZfa())
1156 break;
1157 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1158 "Unexpected subtarget");
1159
1160 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1161 if (!SDValue(Node, 0).use_empty()) {
1162 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1163 Node->getOperand(0));
1164 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1165 }
1166 if (!SDValue(Node, 1).use_empty()) {
1167 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1168 Node->getOperand(0));
1169 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1170 }
1171
1172 CurDAG->RemoveDeadNode(Node);
1173 return;
1174 }
1175 case ISD::SHL: {
1176 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1177 if (!N1C)
1178 break;
1179 SDValue N0 = Node->getOperand(0);
1180 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1181 !isa<ConstantSDNode>(N0.getOperand(1)))
1182 break;
1183 unsigned ShAmt = N1C->getZExtValue();
1184 uint64_t Mask = N0.getConstantOperandVal(1);
1185
1186 if (isShiftedMask_64(Mask)) {
1187 unsigned XLen = Subtarget->getXLen();
1188 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1189 unsigned TrailingZeros = llvm::countr_zero(Mask);
1190 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) {
1191 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1192 // where C2 has 32 leading zeros and C3 trailing zeros.
1193 SDNode *SRLIW = CurDAG->getMachineNode(
1194 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1195 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1196 SDNode *SLLI = CurDAG->getMachineNode(
1197 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1198 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1199 ReplaceNode(Node, SLLI);
1200 return;
1201 }
1202 if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1203 XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1204 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1205 // where C2 has C4 leading zeros and no trailing zeros.
1206 // This is profitable if the "and" was to be lowered to
1207 // (srli (slli X, C4), C4) and not (andi X, C2).
1208 // For "LeadingZeros == 32":
1209 // - with Zba it's just (slli.uw X, C)
1210 // - without Zba a tablegen pattern applies the very same
1211 // transform as we would have done here
1212 SDNode *SLLI = CurDAG->getMachineNode(
1213 RISCV::SLLI, DL, VT, N0->getOperand(0),
1214 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1215 SDNode *SRLI = CurDAG->getMachineNode(
1216 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1217 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1218 ReplaceNode(Node, SRLI);
1219 return;
1220 }
1221 }
1222 break;
1223 }
1224 case ISD::SRL: {
1225 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1226 if (!N1C)
1227 break;
1228 SDValue N0 = Node->getOperand(0);
1229 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1230 break;
1231 unsigned ShAmt = N1C->getZExtValue();
1232 uint64_t Mask = N0.getConstantOperandVal(1);
1233
1234 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1235 // 32 leading zeros and C3 trailing zeros.
1236 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1237 unsigned XLen = Subtarget->getXLen();
1238 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1239 unsigned TrailingZeros = llvm::countr_zero(Mask);
1240 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1241 SDNode *SRLIW = CurDAG->getMachineNode(
1242 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1243 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1244 SDNode *SLLI = CurDAG->getMachineNode(
1245 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1246 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1247 ReplaceNode(Node, SLLI);
1248 return;
1249 }
1250 }
1251
1252 // Optimize (srl (and X, C2), C) ->
1253 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1254 // Where C2 is a mask with C3 trailing ones.
1255 // Taking into account that the C2 may have had lower bits unset by
1256 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1257 // This pattern occurs when type legalizing right shifts for types with
1258 // less than XLen bits.
1259 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1260 if (!isMask_64(Mask))
1261 break;
1262 unsigned TrailingOnes = llvm::countr_one(Mask);
1263 if (ShAmt >= TrailingOnes)
1264 break;
1265 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1266 if (TrailingOnes == 32) {
1267 SDNode *SRLI = CurDAG->getMachineNode(
1268 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1269 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1270 ReplaceNode(Node, SRLI);
1271 return;
1272 }
1273
1274 // Only do the remaining transforms if the AND has one use.
1275 if (!N0.hasOneUse())
1276 break;
1277
1278 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1279 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1280 SDNode *BEXTI = CurDAG->getMachineNode(
1281 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1282 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1283 ReplaceNode(Node, BEXTI);
1284 return;
1285 }
1286
1287 const unsigned Msb = TrailingOnes - 1;
1288 const unsigned Lsb = ShAmt;
1289 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0->getOperand(0), Msb, Lsb))
1290 return;
1291
1292 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1293 SDNode *SLLI =
1294 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1295 CurDAG->getTargetConstant(LShAmt, DL, VT));
1296 SDNode *SRLI = CurDAG->getMachineNode(
1297 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1298 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1299 ReplaceNode(Node, SRLI);
1300 return;
1301 }
1302 case ISD::SRA: {
1303 if (trySignedBitfieldExtract(Node))
1304 return;
1305
1306 if (trySignedBitfieldInsertInSign(Node))
1307 return;
1308
1309 // Optimize (sra (sext_inreg X, i16), C) ->
1310 // (srai (slli X, (XLen-16), (XLen-16) + C)
1311 // And (sra (sext_inreg X, i8), C) ->
1312 // (srai (slli X, (XLen-8), (XLen-8) + C)
1313 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1314 // This transform matches the code we get without Zbb. The shifts are more
1315 // compressible, and this can help expose CSE opportunities in the sdiv by
1316 // constant optimization.
1317 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1318 if (!N1C)
1319 break;
1320 SDValue N0 = Node->getOperand(0);
1321 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1322 break;
1323 unsigned ShAmt = N1C->getZExtValue();
1324 unsigned ExtSize =
1325 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1326 // ExtSize of 32 should use sraiw via tablegen pattern.
1327 if (ExtSize >= 32 || ShAmt >= ExtSize)
1328 break;
1329 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1330 SDNode *SLLI =
1331 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1332 CurDAG->getTargetConstant(LShAmt, DL, VT));
1333 SDNode *SRAI = CurDAG->getMachineNode(
1334 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1335 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1336 ReplaceNode(Node, SRAI);
1337 return;
1338 }
1339 case ISD::OR: {
1340 if (trySignedBitfieldInsertInMask(Node))
1341 return;
1342
1343 if (tryShrinkShlLogicImm(Node))
1344 return;
1345
1346 break;
1347 }
1348 case ISD::XOR:
1349 if (tryShrinkShlLogicImm(Node))
1350 return;
1351
1352 break;
1353 case ISD::AND: {
1354 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1355 if (!N1C)
1356 break;
1357
1358 SDValue N0 = Node->getOperand(0);
1359
1360 bool LeftShift = N0.getOpcode() == ISD::SHL;
1361 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1362 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1363 if (!C)
1364 break;
1365 unsigned C2 = C->getZExtValue();
1366 unsigned XLen = Subtarget->getXLen();
1367 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1368
1369 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1370 // shift pair might offer more compression opportunities.
1371 // TODO: We could check for C extension here, but we don't have many lit
1372 // tests with the C extension enabled so not checking gets better
1373 // coverage.
1374 // TODO: What if ANDI faster than shift?
1375 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1376
1377 uint64_t C1 = N1C->getZExtValue();
1378
1379 // Clear irrelevant bits in the mask.
1380 if (LeftShift)
1381 C1 &= maskTrailingZeros<uint64_t>(C2);
1382 else
1383 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1384
1385 // Some transforms should only be done if the shift has a single use or
1386 // the AND would become (srli (slli X, 32), 32)
1387 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1388
1389 SDValue X = N0.getOperand(0);
1390
1391 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1392 // with c3 leading zeros.
1393 if (!LeftShift && isMask_64(C1)) {
1394 unsigned Leading = XLen - llvm::bit_width(C1);
1395 if (C2 < Leading) {
1396 // If the number of leading zeros is C2+32 this can be SRLIW.
1397 if (C2 + 32 == Leading) {
1398 SDNode *SRLIW = CurDAG->getMachineNode(
1399 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1400 ReplaceNode(Node, SRLIW);
1401 return;
1402 }
1403
1404 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1405 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1406 //
1407 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1408 // legalized and goes through DAG combine.
1409 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1410 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1411 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1412 SDNode *SRAIW =
1413 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1414 CurDAG->getTargetConstant(31, DL, VT));
1415 SDNode *SRLIW = CurDAG->getMachineNode(
1416 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1417 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1418 ReplaceNode(Node, SRLIW);
1419 return;
1420 }
1421
1422 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1423 // available.
1424 // Transform (and (srl x, C2), C1)
1425 // -> (<bfextract> x, msb, lsb)
1426 //
1427 // Make sure to keep this below the SRLIW cases, as we always want to
1428 // prefer the more common instruction.
1429 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1430 const unsigned Lsb = C2;
1431 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1432 return;
1433
1434 // (srli (slli x, c3-c2), c3).
1435 // Skip if we could use (zext.w (sraiw X, C2)).
1436 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1437 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1438 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1439 // Also Skip if we can use bexti or th.tst.
1440 Skip |= HasBitTest && Leading == XLen - 1;
1441 if (OneUseOrZExtW && !Skip) {
1442 SDNode *SLLI = CurDAG->getMachineNode(
1443 RISCV::SLLI, DL, VT, X,
1444 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1445 SDNode *SRLI = CurDAG->getMachineNode(
1446 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1447 CurDAG->getTargetConstant(Leading, DL, VT));
1448 ReplaceNode(Node, SRLI);
1449 return;
1450 }
1451 }
1452 }
1453
1454 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1455 // shifted by c2 bits with c3 leading zeros.
1456 if (LeftShift && isShiftedMask_64(C1)) {
1457 unsigned Leading = XLen - llvm::bit_width(C1);
1458
1459 if (C2 + Leading < XLen &&
1460 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1461 // Use slli.uw when possible.
1462 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1463 SDNode *SLLI_UW =
1464 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1465 CurDAG->getTargetConstant(C2, DL, VT));
1466 ReplaceNode(Node, SLLI_UW);
1467 return;
1468 }
1469
1470 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if
1471 // available.
1472 // Transform (and (shl x, c2), c1)
1473 // -> (<bfinsert> x, msb, lsb)
1474 // e.g.
1475 // (and (shl x, 12), 0x00fff000)
1476 // If XLen = 32 and C2 = 12, then
1477 // Msb = 32 - 8 - 1 = 23 and Lsb = 12
1478 const unsigned Msb = XLen - Leading - 1;
1479 const unsigned Lsb = C2;
1480 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb))
1481 return;
1482
1483 // (srli (slli c2+c3), c3)
1484 if (OneUseOrZExtW && !IsCANDI) {
1485 SDNode *SLLI = CurDAG->getMachineNode(
1486 RISCV::SLLI, DL, VT, X,
1487 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1488 SDNode *SRLI = CurDAG->getMachineNode(
1489 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1490 CurDAG->getTargetConstant(Leading, DL, VT));
1491 ReplaceNode(Node, SRLI);
1492 return;
1493 }
1494 }
1495 }
1496
1497 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1498 // shifted mask with c2 leading zeros and c3 trailing zeros.
1499 if (!LeftShift && isShiftedMask_64(C1)) {
1500 unsigned Leading = XLen - llvm::bit_width(C1);
1501 unsigned Trailing = llvm::countr_zero(C1);
1502 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1503 !IsCANDI) {
1504 unsigned SrliOpc = RISCV::SRLI;
1505 // If the input is zexti32 we should use SRLIW.
1506 if (X.getOpcode() == ISD::AND &&
1507 isa<ConstantSDNode>(X.getOperand(1)) &&
1508 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1509 SrliOpc = RISCV::SRLIW;
1510 X = X.getOperand(0);
1511 }
1512 SDNode *SRLI = CurDAG->getMachineNode(
1513 SrliOpc, DL, VT, X,
1514 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1515 SDNode *SLLI = CurDAG->getMachineNode(
1516 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1517 CurDAG->getTargetConstant(Trailing, DL, VT));
1518 ReplaceNode(Node, SLLI);
1519 return;
1520 }
1521 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1522 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1523 OneUseOrZExtW && !IsCANDI) {
1524 SDNode *SRLIW = CurDAG->getMachineNode(
1525 RISCV::SRLIW, DL, VT, X,
1526 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1527 SDNode *SLLI = CurDAG->getMachineNode(
1528 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1529 CurDAG->getTargetConstant(Trailing, DL, VT));
1530 ReplaceNode(Node, SLLI);
1531 return;
1532 }
1533 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1534 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen &&
1535 OneUseOrZExtW && Subtarget->hasStdExtZba()) {
1536 SDNode *SRLI = CurDAG->getMachineNode(
1537 RISCV::SRLI, DL, VT, X,
1538 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1539 SDNode *SLLI_UW = CurDAG->getMachineNode(
1540 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1541 CurDAG->getTargetConstant(Trailing, DL, VT));
1542 ReplaceNode(Node, SLLI_UW);
1543 return;
1544 }
1545 }
1546
1547 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1548 // shifted mask with no leading zeros and c3 trailing zeros.
1549 if (LeftShift && isShiftedMask_64(C1)) {
1550 unsigned Leading = XLen - llvm::bit_width(C1);
1551 unsigned Trailing = llvm::countr_zero(C1);
1552 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1553 SDNode *SRLI = CurDAG->getMachineNode(
1554 RISCV::SRLI, DL, VT, X,
1555 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1556 SDNode *SLLI = CurDAG->getMachineNode(
1557 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1558 CurDAG->getTargetConstant(Trailing, DL, VT));
1559 ReplaceNode(Node, SLLI);
1560 return;
1561 }
1562 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1563 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1564 SDNode *SRLIW = CurDAG->getMachineNode(
1565 RISCV::SRLIW, DL, VT, X,
1566 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1567 SDNode *SLLI = CurDAG->getMachineNode(
1568 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1569 CurDAG->getTargetConstant(Trailing, DL, VT));
1570 ReplaceNode(Node, SLLI);
1571 return;
1572 }
1573
1574 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1575 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1576 Subtarget->hasStdExtZba()) {
1577 SDNode *SRLI = CurDAG->getMachineNode(
1578 RISCV::SRLI, DL, VT, X,
1579 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1580 SDNode *SLLI_UW = CurDAG->getMachineNode(
1581 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1582 CurDAG->getTargetConstant(Trailing, DL, VT));
1583 ReplaceNode(Node, SLLI_UW);
1584 return;
1585 }
1586 }
1587 }
1588
1589 const uint64_t C1 = N1C->getZExtValue();
1590
1591 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
1592 N0.hasOneUse()) {
1593 unsigned C2 = N0.getConstantOperandVal(1);
1594 unsigned XLen = Subtarget->getXLen();
1595 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1596
1597 SDValue X = N0.getOperand(0);
1598
1599 // Prefer SRAIW + ANDI when possible.
1600 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) &&
1601 X.getOpcode() == ISD::SHL &&
1602 isa<ConstantSDNode>(X.getOperand(1)) &&
1603 X.getConstantOperandVal(1) == 32;
1604 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a
1605 // mask with c3 leading zeros and c2 is larger than c3.
1606 if (isMask_64(C1) && !Skip) {
1607 unsigned Leading = XLen - llvm::bit_width(C1);
1608 if (C2 > Leading) {
1609 SDNode *SRAI = CurDAG->getMachineNode(
1610 RISCV::SRAI, DL, VT, X,
1611 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1612 SDNode *SRLI = CurDAG->getMachineNode(
1613 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1614 CurDAG->getTargetConstant(Leading, DL, VT));
1615 ReplaceNode(Node, SRLI);
1616 return;
1617 }
1618 }
1619
1620 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
1621 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
1622 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4).
1623 if (isShiftedMask_64(C1) && !Skip) {
1624 unsigned Leading = XLen - llvm::bit_width(C1);
1625 unsigned Trailing = llvm::countr_zero(C1);
1626 if (C2 > Leading && Leading > 0 && Trailing > 0) {
1627 SDNode *SRAI = CurDAG->getMachineNode(
1628 RISCV::SRAI, DL, VT, N0.getOperand(0),
1629 CurDAG->getTargetConstant(C2 - Leading, DL, VT));
1630 SDNode *SRLI = CurDAG->getMachineNode(
1631 RISCV::SRLI, DL, VT, SDValue(SRAI, 0),
1632 CurDAG->getTargetConstant(Leading + Trailing, DL, VT));
1633 SDNode *SLLI = CurDAG->getMachineNode(
1634 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1635 CurDAG->getTargetConstant(Trailing, DL, VT));
1636 ReplaceNode(Node, SLLI);
1637 return;
1638 }
1639 }
1640 }
1641
1642 // If C1 masks off the upper bits only (but can't be formed as an
1643 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1644 // available.
1645 // Transform (and x, C1)
1646 // -> (<bfextract> x, msb, lsb)
1647 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) {
1648 const unsigned Msb = llvm::bit_width(C1) - 1;
1649 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1650 return;
1651 }
1652
1653 if (tryShrinkShlLogicImm(Node))
1654 return;
1655
1656 break;
1657 }
1658 case ISD::MUL: {
1659 // Special case for calculating (mul (and X, C2), C1) where the full product
1660 // fits in XLen bits. We can shift X left by the number of leading zeros in
1661 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1662 // product has XLen trailing zeros, putting it in the output of MULHU. This
1663 // can avoid materializing a constant in a register for C2.
1664
1665 // RHS should be a constant.
1666 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1667 if (!N1C || !N1C->hasOneUse())
1668 break;
1669
1670 // LHS should be an AND with constant.
1671 SDValue N0 = Node->getOperand(0);
1672 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1673 break;
1674
1675 uint64_t C2 = N0.getConstantOperandVal(1);
1676
1677 // Constant should be a mask.
1678 if (!isMask_64(C2))
1679 break;
1680
1681 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1682 // multiple users or the constant is a simm12. This prevents inserting a
1683 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1684 // make it more costly to materialize. Otherwise, using a SLLI might allow
1685 // it to be compressed.
1686 bool IsANDIOrZExt =
1687 isInt<12>(C2) ||
1688 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1689 // With XTHeadBb, we can use TH.EXTU.
1690 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1691 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1692 break;
1693 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1694 // the constant is a simm32.
1695 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1696 // With XTHeadBb, we can use TH.EXTU.
1697 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1698 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1699 break;
1700
1701 // We need to shift left the AND input and C1 by a total of XLen bits.
1702
1703 // How far left do we need to shift the AND input?
1704 unsigned XLen = Subtarget->getXLen();
1705 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1706
1707 // The constant gets shifted by the remaining amount unless that would
1708 // shift bits out.
1709 uint64_t C1 = N1C->getZExtValue();
1710 unsigned ConstantShift = XLen - LeadingZeros;
1711 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1712 break;
1713
1714 uint64_t ShiftedC1 = C1 << ConstantShift;
1715 // If this RV32, we need to sign extend the constant.
1716 if (XLen == 32)
1717 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1718
1719 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1720 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1721 SDNode *SLLI =
1722 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1723 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1724 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1725 SDValue(SLLI, 0), SDValue(Imm, 0));
1726 ReplaceNode(Node, MULHU);
1727 return;
1728 }
1729 case ISD::LOAD: {
1730 if (tryIndexedLoad(Node))
1731 return;
1732
1733 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) {
1734 // We match post-incrementing load here
1735 LoadSDNode *Load = cast<LoadSDNode>(Node);
1736 if (Load->getAddressingMode() != ISD::POST_INC)
1737 break;
1738
1739 SDValue Chain = Node->getOperand(0);
1740 SDValue Base = Node->getOperand(1);
1741 SDValue Offset = Node->getOperand(2);
1742
1743 bool Simm12 = false;
1744 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1745
1746 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1747 int ConstantVal = ConstantOffset->getSExtValue();
1748 Simm12 = isInt<12>(ConstantVal);
1749 if (Simm12)
1750 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1751 Offset.getValueType());
1752 }
1753
1754 unsigned Opcode = 0;
1755 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1756 case MVT::i8:
1757 if (Simm12 && SignExtend)
1758 Opcode = RISCV::CV_LB_ri_inc;
1759 else if (Simm12 && !SignExtend)
1760 Opcode = RISCV::CV_LBU_ri_inc;
1761 else if (!Simm12 && SignExtend)
1762 Opcode = RISCV::CV_LB_rr_inc;
1763 else
1764 Opcode = RISCV::CV_LBU_rr_inc;
1765 break;
1766 case MVT::i16:
1767 if (Simm12 && SignExtend)
1768 Opcode = RISCV::CV_LH_ri_inc;
1769 else if (Simm12 && !SignExtend)
1770 Opcode = RISCV::CV_LHU_ri_inc;
1771 else if (!Simm12 && SignExtend)
1772 Opcode = RISCV::CV_LH_rr_inc;
1773 else
1774 Opcode = RISCV::CV_LHU_rr_inc;
1775 break;
1776 case MVT::i32:
1777 if (Simm12)
1778 Opcode = RISCV::CV_LW_ri_inc;
1779 else
1780 Opcode = RISCV::CV_LW_rr_inc;
1781 break;
1782 default:
1783 break;
1784 }
1785 if (!Opcode)
1786 break;
1787
1788 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1789 Chain.getSimpleValueType(), Base,
1790 Offset, Chain));
1791 return;
1792 }
1793 break;
1794 }
1795 case RISCVISD::LD_RV32: {
1796 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd");
1797
1798 SDValue Base, Offset;
1799 SDValue Chain = Node->getOperand(0);
1800 SDValue Addr = Node->getOperand(1);
1801 SelectAddrRegImm(Addr, Base, Offset);
1802
1803 SDValue Ops[] = {Base, Offset, Chain};
1804 MachineSDNode *New = CurDAG->getMachineNode(
1805 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops);
1806 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL,
1807 MVT::i32, SDValue(New, 0));
1808 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL,
1809 MVT::i32, SDValue(New, 0));
1810 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1811 ReplaceUses(SDValue(Node, 0), Lo);
1812 ReplaceUses(SDValue(Node, 1), Hi);
1813 ReplaceUses(SDValue(Node, 2), SDValue(New, 1));
1814 CurDAG->RemoveDeadNode(Node);
1815 return;
1816 }
1817 case RISCVISD::SD_RV32: {
1818 SDValue Base, Offset;
1819 SDValue Chain = Node->getOperand(0);
1820 SDValue Addr = Node->getOperand(3);
1821 SelectAddrRegImm(Addr, Base, Offset);
1822
1823 SDValue Lo = Node->getOperand(1);
1824 SDValue Hi = Node->getOperand(2);
1825
1826 SDValue RegPair;
1827 // Peephole to use X0_Pair for storing zero.
1828 if (isNullConstant(Lo) && isNullConstant(Hi)) {
1829 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped);
1830 } else {
1831 SDValue Ops[] = {
1832 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo,
1833 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi,
1834 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1835
1836 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
1837 MVT::Untyped, Ops),
1838 0);
1839 }
1840
1841 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other,
1842 {RegPair, Base, Offset, Chain});
1843 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()});
1844 ReplaceUses(SDValue(Node, 0), SDValue(New, 0));
1845 CurDAG->RemoveDeadNode(Node);
1846 return;
1847 }
1848 case ISD::INTRINSIC_WO_CHAIN: {
1849 unsigned IntNo = Node->getConstantOperandVal(0);
1850 switch (IntNo) {
1851 // By default we do not custom select any intrinsic.
1852 default:
1853 break;
1854 case Intrinsic::riscv_vmsgeu:
1855 case Intrinsic::riscv_vmsge: {
1856 SDValue Src1 = Node->getOperand(1);
1857 SDValue Src2 = Node->getOperand(2);
1858 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1859 bool IsCmpConstant = false;
1860 bool IsCmpMinimum = false;
1861 // Only custom select scalar second operand.
1862 if (Src2.getValueType() != XLenVT)
1863 break;
1864 // Small constants are handled with patterns.
1865 int64_t CVal = 0;
1866 MVT Src1VT = Src1.getSimpleValueType();
1867 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1868 IsCmpConstant = true;
1869 CVal = C->getSExtValue();
1870 if (CVal >= -15 && CVal <= 16) {
1871 if (!IsUnsigned || CVal != 0)
1872 break;
1873 IsCmpMinimum = true;
1874 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1875 Src1VT.getScalarSizeInBits())
1876 .getSExtValue()) {
1877 IsCmpMinimum = true;
1878 }
1879 }
1880 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode;
1881 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1882 default:
1883 llvm_unreachable("Unexpected LMUL!");
1884 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1885 case RISCVVType::lmulenum: \
1886 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1887 : RISCV::PseudoVMSLT_VX_##suffix; \
1888 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \
1889 : RISCV::PseudoVMSGT_VX_##suffix; \
1890 break;
1891 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1892 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1893 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1894 CASE_VMSLT_OPCODES(LMUL_1, M1)
1895 CASE_VMSLT_OPCODES(LMUL_2, M2)
1896 CASE_VMSLT_OPCODES(LMUL_4, M4)
1897 CASE_VMSLT_OPCODES(LMUL_8, M8)
1898 #undef CASE_VMSLT_OPCODES
1899 }
1900 // Mask operations use the LMUL from the mask type.
1901 switch (RISCVTargetLowering::getLMUL(VT)) {
1902 default:
1903 llvm_unreachable("Unexpected LMUL!");
1904 #define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \
1905 case RISCVVType::lmulenum: \
1906 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1907 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \
1908 break;
1909 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64)
1910 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32)
1911 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16)
1912 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8)
1913 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4)
1914 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2)
1915 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1)
1916 #undef CASE_VMNAND_VMSET_OPCODES
1917 }
1918 SDValue SEW = CurDAG->getTargetConstant(
1919 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1920 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1921 SDValue VL;
1922 selectVLOp(Node->getOperand(3), VL);
1923
1924 // If vmsge(u) with minimum value, expand it to vmset.
1925 if (IsCmpMinimum) {
1926 ReplaceNode(Node,
1927 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW));
1928 return;
1929 }
1930
1931 if (IsCmpConstant) {
1932 SDValue Imm =
1933 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
1934
1935 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT,
1936 {Src1, Imm, VL, SEW}));
1937 return;
1938 }
1939
1940 // Expand to
1941 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1942 SDValue Cmp = SDValue(
1943 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1944 0);
1945 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1946 {Cmp, Cmp, VL, MaskSEW}));
1947 return;
1948 }
1949 case Intrinsic::riscv_vmsgeu_mask:
1950 case Intrinsic::riscv_vmsge_mask: {
1951 SDValue Src1 = Node->getOperand(2);
1952 SDValue Src2 = Node->getOperand(3);
1953 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1954 bool IsCmpConstant = false;
1955 bool IsCmpMinimum = false;
1956 // Only custom select scalar second operand.
1957 if (Src2.getValueType() != XLenVT)
1958 break;
1959 // Small constants are handled with patterns.
1960 MVT Src1VT = Src1.getSimpleValueType();
1961 int64_t CVal = 0;
1962 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1963 IsCmpConstant = true;
1964 CVal = C->getSExtValue();
1965 if (CVal >= -15 && CVal <= 16) {
1966 if (!IsUnsigned || CVal != 0)
1967 break;
1968 IsCmpMinimum = true;
1969 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue(
1970 Src1VT.getScalarSizeInBits())
1971 .getSExtValue()) {
1972 IsCmpMinimum = true;
1973 }
1974 }
1975 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1976 VMOROpcode, VMSGTMaskOpcode;
1977 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1978 default:
1979 llvm_unreachable("Unexpected LMUL!");
1980 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \
1981 case RISCVVType::lmulenum: \
1982 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1983 : RISCV::PseudoVMSLT_VX_##suffix; \
1984 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1985 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1986 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \
1987 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \
1988 break;
1989 CASE_VMSLT_OPCODES(LMUL_F8, MF8)
1990 CASE_VMSLT_OPCODES(LMUL_F4, MF4)
1991 CASE_VMSLT_OPCODES(LMUL_F2, MF2)
1992 CASE_VMSLT_OPCODES(LMUL_1, M1)
1993 CASE_VMSLT_OPCODES(LMUL_2, M2)
1994 CASE_VMSLT_OPCODES(LMUL_4, M4)
1995 CASE_VMSLT_OPCODES(LMUL_8, M8)
1996 #undef CASE_VMSLT_OPCODES
1997 }
1998 // Mask operations use the LMUL from the mask type.
1999 switch (RISCVTargetLowering::getLMUL(VT)) {
2000 default:
2001 llvm_unreachable("Unexpected LMUL!");
2002 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
2003 case RISCVVType::lmulenum: \
2004 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
2005 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
2006 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
2007 break;
2008 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64)
2009 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32)
2010 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16)
2011 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8)
2012 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4)
2013 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2)
2014 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1)
2015 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
2016 }
2017 SDValue SEW = CurDAG->getTargetConstant(
2018 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
2019 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
2020 SDValue VL;
2021 selectVLOp(Node->getOperand(5), VL);
2022 SDValue MaskedOff = Node->getOperand(1);
2023 SDValue Mask = Node->getOperand(4);
2024
2025 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff.
2026 if (IsCmpMinimum) {
2027 // We don't need vmor if the MaskedOff and the Mask are the same
2028 // value.
2029 if (Mask == MaskedOff) {
2030 ReplaceUses(Node, Mask.getNode());
2031 return;
2032 }
2033 ReplaceNode(Node,
2034 CurDAG->getMachineNode(VMOROpcode, DL, VT,
2035 {Mask, MaskedOff, VL, MaskSEW}));
2036 return;
2037 }
2038
2039 // If the MaskedOff value and the Mask are the same value use
2040 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
2041 // This avoids needing to copy v0 to vd before starting the next sequence.
2042 if (Mask == MaskedOff) {
2043 SDValue Cmp = SDValue(
2044 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
2045 0);
2046 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
2047 {Mask, Cmp, VL, MaskSEW}));
2048 return;
2049 }
2050
2051 SDValue PolicyOp =
2052 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT);
2053
2054 if (IsCmpConstant) {
2055 SDValue Imm =
2056 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget);
2057
2058 ReplaceNode(Node, CurDAG->getMachineNode(
2059 VMSGTMaskOpcode, DL, VT,
2060 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp}));
2061 return;
2062 }
2063
2064 // Otherwise use
2065 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
2066 // The result is mask undisturbed.
2067 // We use the same instructions to emulate mask agnostic behavior, because
2068 // the agnostic result can be either undisturbed or all 1.
2069 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
2070 {MaskedOff, Src1, Src2, Mask,
2071 VL, SEW, PolicyOp}),
2072 0);
2073 // vmxor.mm vd, vd, v0 is used to update active value.
2074 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
2075 {Cmp, Mask, VL, MaskSEW}));
2076 return;
2077 }
2078 case Intrinsic::riscv_vsetvli:
2079 case Intrinsic::riscv_vsetvlimax:
2080 return selectVSETVLI(Node);
2081 }
2082 break;
2083 }
2084 case ISD::INTRINSIC_W_CHAIN: {
2085 unsigned IntNo = Node->getConstantOperandVal(1);
2086 switch (IntNo) {
2087 // By default we do not custom select any intrinsic.
2088 default:
2089 break;
2090 case Intrinsic::riscv_vlseg2:
2091 case Intrinsic::riscv_vlseg3:
2092 case Intrinsic::riscv_vlseg4:
2093 case Intrinsic::riscv_vlseg5:
2094 case Intrinsic::riscv_vlseg6:
2095 case Intrinsic::riscv_vlseg7:
2096 case Intrinsic::riscv_vlseg8: {
2097 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2098 /*IsStrided*/ false);
2099 return;
2100 }
2101 case Intrinsic::riscv_vlseg2_mask:
2102 case Intrinsic::riscv_vlseg3_mask:
2103 case Intrinsic::riscv_vlseg4_mask:
2104 case Intrinsic::riscv_vlseg5_mask:
2105 case Intrinsic::riscv_vlseg6_mask:
2106 case Intrinsic::riscv_vlseg7_mask:
2107 case Intrinsic::riscv_vlseg8_mask: {
2108 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2109 /*IsStrided*/ false);
2110 return;
2111 }
2112 case Intrinsic::riscv_vlsseg2:
2113 case Intrinsic::riscv_vlsseg3:
2114 case Intrinsic::riscv_vlsseg4:
2115 case Intrinsic::riscv_vlsseg5:
2116 case Intrinsic::riscv_vlsseg6:
2117 case Intrinsic::riscv_vlsseg7:
2118 case Intrinsic::riscv_vlsseg8: {
2119 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2120 /*IsStrided*/ true);
2121 return;
2122 }
2123 case Intrinsic::riscv_vlsseg2_mask:
2124 case Intrinsic::riscv_vlsseg3_mask:
2125 case Intrinsic::riscv_vlsseg4_mask:
2126 case Intrinsic::riscv_vlsseg5_mask:
2127 case Intrinsic::riscv_vlsseg6_mask:
2128 case Intrinsic::riscv_vlsseg7_mask:
2129 case Intrinsic::riscv_vlsseg8_mask: {
2130 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2131 /*IsStrided*/ true);
2132 return;
2133 }
2134 case Intrinsic::riscv_vloxseg2:
2135 case Intrinsic::riscv_vloxseg3:
2136 case Intrinsic::riscv_vloxseg4:
2137 case Intrinsic::riscv_vloxseg5:
2138 case Intrinsic::riscv_vloxseg6:
2139 case Intrinsic::riscv_vloxseg7:
2140 case Intrinsic::riscv_vloxseg8:
2141 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2142 /*IsOrdered*/ true);
2143 return;
2144 case Intrinsic::riscv_vluxseg2:
2145 case Intrinsic::riscv_vluxseg3:
2146 case Intrinsic::riscv_vluxseg4:
2147 case Intrinsic::riscv_vluxseg5:
2148 case Intrinsic::riscv_vluxseg6:
2149 case Intrinsic::riscv_vluxseg7:
2150 case Intrinsic::riscv_vluxseg8:
2151 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2152 /*IsOrdered*/ false);
2153 return;
2154 case Intrinsic::riscv_vloxseg2_mask:
2155 case Intrinsic::riscv_vloxseg3_mask:
2156 case Intrinsic::riscv_vloxseg4_mask:
2157 case Intrinsic::riscv_vloxseg5_mask:
2158 case Intrinsic::riscv_vloxseg6_mask:
2159 case Intrinsic::riscv_vloxseg7_mask:
2160 case Intrinsic::riscv_vloxseg8_mask:
2161 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2162 /*IsOrdered*/ true);
2163 return;
2164 case Intrinsic::riscv_vluxseg2_mask:
2165 case Intrinsic::riscv_vluxseg3_mask:
2166 case Intrinsic::riscv_vluxseg4_mask:
2167 case Intrinsic::riscv_vluxseg5_mask:
2168 case Intrinsic::riscv_vluxseg6_mask:
2169 case Intrinsic::riscv_vluxseg7_mask:
2170 case Intrinsic::riscv_vluxseg8_mask:
2171 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2172 /*IsOrdered*/ false);
2173 return;
2174 case Intrinsic::riscv_vlseg8ff:
2175 case Intrinsic::riscv_vlseg7ff:
2176 case Intrinsic::riscv_vlseg6ff:
2177 case Intrinsic::riscv_vlseg5ff:
2178 case Intrinsic::riscv_vlseg4ff:
2179 case Intrinsic::riscv_vlseg3ff:
2180 case Intrinsic::riscv_vlseg2ff: {
2181 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false);
2182 return;
2183 }
2184 case Intrinsic::riscv_vlseg8ff_mask:
2185 case Intrinsic::riscv_vlseg7ff_mask:
2186 case Intrinsic::riscv_vlseg6ff_mask:
2187 case Intrinsic::riscv_vlseg5ff_mask:
2188 case Intrinsic::riscv_vlseg4ff_mask:
2189 case Intrinsic::riscv_vlseg3ff_mask:
2190 case Intrinsic::riscv_vlseg2ff_mask: {
2191 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true);
2192 return;
2193 }
2194 case Intrinsic::riscv_vloxei:
2195 case Intrinsic::riscv_vloxei_mask:
2196 case Intrinsic::riscv_vluxei:
2197 case Intrinsic::riscv_vluxei_mask: {
2198 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
2199 IntNo == Intrinsic::riscv_vluxei_mask;
2200 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
2201 IntNo == Intrinsic::riscv_vloxei_mask;
2202
2203 MVT VT = Node->getSimpleValueType(0);
2204 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2205
2206 unsigned CurOp = 2;
2207 SmallVector<SDValue, 8> Operands;
2208 Operands.push_back(Node->getOperand(CurOp++));
2209
2210 MVT IndexVT;
2211 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2212 /*IsStridedOrIndexed*/ true, Operands,
2213 /*IsLoad=*/true, &IndexVT);
2214
2215 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2216 "Element count mismatch");
2217
2218 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2219 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2220 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2221 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2222 report_fatal_error("The V extension does not support EEW=64 for index "
2223 "values when XLEN=32");
2224 }
2225 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
2226 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
2227 static_cast<unsigned>(IndexLMUL));
2228 MachineSDNode *Load =
2229 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2230
2231 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2232
2233 ReplaceNode(Node, Load);
2234 return;
2235 }
2236 case Intrinsic::riscv_vlm:
2237 case Intrinsic::riscv_vle:
2238 case Intrinsic::riscv_vle_mask:
2239 case Intrinsic::riscv_vlse:
2240 case Intrinsic::riscv_vlse_mask: {
2241 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
2242 IntNo == Intrinsic::riscv_vlse_mask;
2243 bool IsStrided =
2244 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
2245
2246 MVT VT = Node->getSimpleValueType(0);
2247 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2248
2249 // The riscv_vlm intrinsic are always tail agnostic and no passthru
2250 // operand at the IR level. In pseudos, they have both policy and
2251 // passthru operand. The passthru operand is needed to track the
2252 // "tail undefined" state, and the policy is there just for
2253 // for consistency - it will always be "don't care" for the
2254 // unmasked form.
2255 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
2256 unsigned CurOp = 2;
2257 SmallVector<SDValue, 8> Operands;
2258 if (HasPassthruOperand)
2259 Operands.push_back(Node->getOperand(CurOp++));
2260 else {
2261 // We eagerly lower to implicit_def (instead of undef), as we
2262 // otherwise fail to select nodes such as: nxv1i1 = undef
2263 SDNode *Passthru =
2264 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
2265 Operands.push_back(SDValue(Passthru, 0));
2266 }
2267 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2268 Operands, /*IsLoad=*/true);
2269
2270 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2271 const RISCV::VLEPseudo *P =
2272 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
2273 static_cast<unsigned>(LMUL));
2274 MachineSDNode *Load =
2275 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2276
2277 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2278
2279 ReplaceNode(Node, Load);
2280 return;
2281 }
2282 case Intrinsic::riscv_vleff:
2283 case Intrinsic::riscv_vleff_mask: {
2284 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
2285
2286 MVT VT = Node->getSimpleValueType(0);
2287 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2288
2289 unsigned CurOp = 2;
2290 SmallVector<SDValue, 7> Operands;
2291 Operands.push_back(Node->getOperand(CurOp++));
2292 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2293 /*IsStridedOrIndexed*/ false, Operands,
2294 /*IsLoad=*/true);
2295
2296 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2297 const RISCV::VLEPseudo *P =
2298 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
2299 Log2SEW, static_cast<unsigned>(LMUL));
2300 MachineSDNode *Load = CurDAG->getMachineNode(
2301 P->Pseudo, DL, Node->getVTList(), Operands);
2302 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()});
2303
2304 ReplaceNode(Node, Load);
2305 return;
2306 }
2307 case Intrinsic::riscv_nds_vln:
2308 case Intrinsic::riscv_nds_vln_mask:
2309 case Intrinsic::riscv_nds_vlnu:
2310 case Intrinsic::riscv_nds_vlnu_mask: {
2311 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask ||
2312 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2313 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu ||
2314 IntNo == Intrinsic::riscv_nds_vlnu_mask;
2315
2316 MVT VT = Node->getSimpleValueType(0);
2317 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2318 unsigned CurOp = 2;
2319 SmallVector<SDValue, 8> Operands;
2320
2321 Operands.push_back(Node->getOperand(CurOp++));
2322 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2323 /*IsStridedOrIndexed=*/false, Operands,
2324 /*IsLoad=*/true);
2325
2326 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2327 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo(
2328 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL));
2329 MachineSDNode *Load =
2330 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2331
2332 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2333 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
2334
2335 ReplaceNode(Node, Load);
2336 return;
2337 }
2338 }
2339 break;
2340 }
2341 case ISD::INTRINSIC_VOID: {
2342 unsigned IntNo = Node->getConstantOperandVal(1);
2343 switch (IntNo) {
2344 case Intrinsic::riscv_vsseg2:
2345 case Intrinsic::riscv_vsseg3:
2346 case Intrinsic::riscv_vsseg4:
2347 case Intrinsic::riscv_vsseg5:
2348 case Intrinsic::riscv_vsseg6:
2349 case Intrinsic::riscv_vsseg7:
2350 case Intrinsic::riscv_vsseg8: {
2351 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2352 /*IsStrided*/ false);
2353 return;
2354 }
2355 case Intrinsic::riscv_vsseg2_mask:
2356 case Intrinsic::riscv_vsseg3_mask:
2357 case Intrinsic::riscv_vsseg4_mask:
2358 case Intrinsic::riscv_vsseg5_mask:
2359 case Intrinsic::riscv_vsseg6_mask:
2360 case Intrinsic::riscv_vsseg7_mask:
2361 case Intrinsic::riscv_vsseg8_mask: {
2362 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2363 /*IsStrided*/ false);
2364 return;
2365 }
2366 case Intrinsic::riscv_vssseg2:
2367 case Intrinsic::riscv_vssseg3:
2368 case Intrinsic::riscv_vssseg4:
2369 case Intrinsic::riscv_vssseg5:
2370 case Intrinsic::riscv_vssseg6:
2371 case Intrinsic::riscv_vssseg7:
2372 case Intrinsic::riscv_vssseg8: {
2373 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2374 /*IsStrided*/ true);
2375 return;
2376 }
2377 case Intrinsic::riscv_vssseg2_mask:
2378 case Intrinsic::riscv_vssseg3_mask:
2379 case Intrinsic::riscv_vssseg4_mask:
2380 case Intrinsic::riscv_vssseg5_mask:
2381 case Intrinsic::riscv_vssseg6_mask:
2382 case Intrinsic::riscv_vssseg7_mask:
2383 case Intrinsic::riscv_vssseg8_mask: {
2384 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2385 /*IsStrided*/ true);
2386 return;
2387 }
2388 case Intrinsic::riscv_vsoxseg2:
2389 case Intrinsic::riscv_vsoxseg3:
2390 case Intrinsic::riscv_vsoxseg4:
2391 case Intrinsic::riscv_vsoxseg5:
2392 case Intrinsic::riscv_vsoxseg6:
2393 case Intrinsic::riscv_vsoxseg7:
2394 case Intrinsic::riscv_vsoxseg8:
2395 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2396 /*IsOrdered*/ true);
2397 return;
2398 case Intrinsic::riscv_vsuxseg2:
2399 case Intrinsic::riscv_vsuxseg3:
2400 case Intrinsic::riscv_vsuxseg4:
2401 case Intrinsic::riscv_vsuxseg5:
2402 case Intrinsic::riscv_vsuxseg6:
2403 case Intrinsic::riscv_vsuxseg7:
2404 case Intrinsic::riscv_vsuxseg8:
2405 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false,
2406 /*IsOrdered*/ false);
2407 return;
2408 case Intrinsic::riscv_vsoxseg2_mask:
2409 case Intrinsic::riscv_vsoxseg3_mask:
2410 case Intrinsic::riscv_vsoxseg4_mask:
2411 case Intrinsic::riscv_vsoxseg5_mask:
2412 case Intrinsic::riscv_vsoxseg6_mask:
2413 case Intrinsic::riscv_vsoxseg7_mask:
2414 case Intrinsic::riscv_vsoxseg8_mask:
2415 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2416 /*IsOrdered*/ true);
2417 return;
2418 case Intrinsic::riscv_vsuxseg2_mask:
2419 case Intrinsic::riscv_vsuxseg3_mask:
2420 case Intrinsic::riscv_vsuxseg4_mask:
2421 case Intrinsic::riscv_vsuxseg5_mask:
2422 case Intrinsic::riscv_vsuxseg6_mask:
2423 case Intrinsic::riscv_vsuxseg7_mask:
2424 case Intrinsic::riscv_vsuxseg8_mask:
2425 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true,
2426 /*IsOrdered*/ false);
2427 return;
2428 case Intrinsic::riscv_vsoxei:
2429 case Intrinsic::riscv_vsoxei_mask:
2430 case Intrinsic::riscv_vsuxei:
2431 case Intrinsic::riscv_vsuxei_mask: {
2432 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2433 IntNo == Intrinsic::riscv_vsuxei_mask;
2434 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2435 IntNo == Intrinsic::riscv_vsoxei_mask;
2436
2437 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2438 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2439
2440 unsigned CurOp = 2;
2441 SmallVector<SDValue, 8> Operands;
2442 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2443
2444 MVT IndexVT;
2445 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2446 /*IsStridedOrIndexed*/ true, Operands,
2447 /*IsLoad=*/false, &IndexVT);
2448
2449 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2450 "Element count mismatch");
2451
2452 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2453 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2454 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2455 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2456 report_fatal_error("The V extension does not support EEW=64 for index "
2457 "values when XLEN=32");
2458 }
2459 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2460 IsMasked, IsOrdered, IndexLog2EEW,
2461 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2462 MachineSDNode *Store =
2463 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2464
2465 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2466
2467 ReplaceNode(Node, Store);
2468 return;
2469 }
2470 case Intrinsic::riscv_vsm:
2471 case Intrinsic::riscv_vse:
2472 case Intrinsic::riscv_vse_mask:
2473 case Intrinsic::riscv_vsse:
2474 case Intrinsic::riscv_vsse_mask: {
2475 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2476 IntNo == Intrinsic::riscv_vsse_mask;
2477 bool IsStrided =
2478 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2479
2480 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2481 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2482
2483 unsigned CurOp = 2;
2484 SmallVector<SDValue, 8> Operands;
2485 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2486
2487 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2488 Operands);
2489
2490 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2491 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2492 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2493 MachineSDNode *Store =
2494 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2495 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()});
2496
2497 ReplaceNode(Node, Store);
2498 return;
2499 }
2500 case Intrinsic::riscv_sf_vc_x_se:
2501 case Intrinsic::riscv_sf_vc_i_se:
2502 selectSF_VC_X_SE(Node);
2503 return;
2504 }
2505 break;
2506 }
2507 case ISD::BITCAST: {
2508 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2509 // Just drop bitcasts between vectors if both are fixed or both are
2510 // scalable.
2511 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2512 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2513 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2514 CurDAG->RemoveDeadNode(Node);
2515 return;
2516 }
2517 break;
2518 }
2519 case ISD::INSERT_SUBVECTOR:
2520 case RISCVISD::TUPLE_INSERT: {
2521 SDValue V = Node->getOperand(0);
2522 SDValue SubV = Node->getOperand(1);
2523 SDLoc DL(SubV);
2524 auto Idx = Node->getConstantOperandVal(2);
2525 MVT SubVecVT = SubV.getSimpleValueType();
2526
2527 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2528 MVT SubVecContainerVT = SubVecVT;
2529 // Establish the correct scalable-vector types for any fixed-length type.
2530 if (SubVecVT.isFixedLengthVector()) {
2531 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2532 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2533 [[maybe_unused]] bool ExactlyVecRegSized =
2534 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2535 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2536 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2537 .getKnownMinValue()));
2538 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2539 }
2540 MVT ContainerVT = VT;
2541 if (VT.isFixedLengthVector())
2542 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2543
2544 const auto *TRI = Subtarget->getRegisterInfo();
2545 unsigned SubRegIdx;
2546 std::tie(SubRegIdx, Idx) =
2547 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2548 ContainerVT, SubVecContainerVT, Idx, TRI);
2549
2550 // If the Idx hasn't been completely eliminated then this is a subvector
2551 // insert which doesn't naturally align to a vector register. These must
2552 // be handled using instructions to manipulate the vector registers.
2553 if (Idx != 0)
2554 break;
2555
2556 RISCVVType::VLMUL SubVecLMUL =
2557 RISCVTargetLowering::getLMUL(SubVecContainerVT);
2558 [[maybe_unused]] bool IsSubVecPartReg =
2559 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 ||
2560 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 ||
2561 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8;
2562 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg ||
2563 V.isUndef()) &&
2564 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2565 "the subvector is smaller than a full-sized register");
2566
2567 // If we haven't set a SubRegIdx, then we must be going between
2568 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2569 if (SubRegIdx == RISCV::NoSubRegister) {
2570 unsigned InRegClassID =
2571 RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);
2572 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2573 InRegClassID &&
2574 "Unexpected subvector extraction");
2575 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2576 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2577 DL, VT, SubV, RC);
2578 ReplaceNode(Node, NewNode);
2579 return;
2580 }
2581
2582 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2583 ReplaceNode(Node, Insert.getNode());
2584 return;
2585 }
2586 case ISD::EXTRACT_SUBVECTOR:
2587 case RISCVISD::TUPLE_EXTRACT: {
2588 SDValue V = Node->getOperand(0);
2589 auto Idx = Node->getConstantOperandVal(1);
2590 MVT InVT = V.getSimpleValueType();
2591 SDLoc DL(V);
2592
2593 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2594 MVT SubVecContainerVT = VT;
2595 // Establish the correct scalable-vector types for any fixed-length type.
2596 if (VT.isFixedLengthVector()) {
2597 assert(Idx == 0);
2598 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2599 }
2600 if (InVT.isFixedLengthVector())
2601 InVT = TLI.getContainerForFixedLengthVector(InVT);
2602
2603 const auto *TRI = Subtarget->getRegisterInfo();
2604 unsigned SubRegIdx;
2605 std::tie(SubRegIdx, Idx) =
2606 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2607 InVT, SubVecContainerVT, Idx, TRI);
2608
2609 // If the Idx hasn't been completely eliminated then this is a subvector
2610 // extract which doesn't naturally align to a vector register. These must
2611 // be handled using instructions to manipulate the vector registers.
2612 if (Idx != 0)
2613 break;
2614
2615 // If we haven't set a SubRegIdx, then we must be going between
2616 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2617 if (SubRegIdx == RISCV::NoSubRegister) {
2618 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2619 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2620 InRegClassID &&
2621 "Unexpected subvector extraction");
2622 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2623 SDNode *NewNode =
2624 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2625 ReplaceNode(Node, NewNode);
2626 return;
2627 }
2628
2629 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2630 ReplaceNode(Node, Extract.getNode());
2631 return;
2632 }
2633 case RISCVISD::VMV_S_X_VL:
2634 case RISCVISD::VFMV_S_F_VL:
2635 case RISCVISD::VMV_V_X_VL:
2636 case RISCVISD::VFMV_V_F_VL: {
2637 // Try to match splat of a scalar load to a strided load with stride of x0.
2638 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2639 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2640 if (!Node->getOperand(0).isUndef())
2641 break;
2642 SDValue Src = Node->getOperand(1);
2643 auto *Ld = dyn_cast<LoadSDNode>(Src);
2644 // Can't fold load update node because the second
2645 // output is used so that load update node can't be removed.
2646 if (!Ld || Ld->isIndexed())
2647 break;
2648 EVT MemVT = Ld->getMemoryVT();
2649 // The memory VT should be the same size as the element type.
2650 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2651 break;
2652 if (!IsProfitableToFold(Src, Node, Node) ||
2653 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2654 break;
2655
2656 SDValue VL;
2657 if (IsScalarMove) {
2658 // We could deal with more VL if we update the VSETVLI insert pass to
2659 // avoid introducing more VSETVLI.
2660 if (!isOneConstant(Node->getOperand(2)))
2661 break;
2662 selectVLOp(Node->getOperand(2), VL);
2663 } else
2664 selectVLOp(Node->getOperand(2), VL);
2665
2666 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2667 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2668
2669 // If VL=1, then we don't need to do a strided load and can just do a
2670 // regular load.
2671 bool IsStrided = !isOneConstant(VL);
2672
2673 // Only do a strided load if we have optimized zero-stride vector load.
2674 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2675 break;
2676
2677 SmallVector<SDValue> Operands = {
2678 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2679 Ld->getBasePtr()};
2680 if (IsStrided)
2681 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2682 uint64_t Policy = RISCVVType::MASK_AGNOSTIC | RISCVVType::TAIL_AGNOSTIC;
2683 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2684 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2685
2686 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2687 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2688 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2689 Log2SEW, static_cast<unsigned>(LMUL));
2690 MachineSDNode *Load =
2691 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2692 // Update the chain.
2693 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2694 // Record the mem-refs
2695 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2696 // Replace the splat with the vlse.
2697 ReplaceNode(Node, Load);
2698 return;
2699 }
2700 case ISD::PREFETCH:
2701 unsigned Locality = Node->getConstantOperandVal(3);
2702 if (Locality > 2)
2703 break;
2704
2705 auto *LoadStoreMem = cast<MemSDNode>(Node);
2706 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2707 MMO->setFlags(MachineMemOperand::MONonTemporal);
2708
2709 int NontemporalLevel = 0;
2710 switch (Locality) {
2711 case 0:
2712 NontemporalLevel = 3; // NTL.ALL
2713 break;
2714 case 1:
2715 NontemporalLevel = 1; // NTL.PALL
2716 break;
2717 case 2:
2718 NontemporalLevel = 0; // NTL.P1
2719 break;
2720 default:
2721 llvm_unreachable("unexpected locality value.");
2722 }
2723
2724 if (NontemporalLevel & 0b1)
2725 MMO->setFlags(MONontemporalBit0);
2726 if (NontemporalLevel & 0b10)
2727 MMO->setFlags(MONontemporalBit1);
2728 break;
2729 }
2730
2731 // Select the default instruction.
2732 SelectCode(Node);
2733 }
2734
SelectInlineAsmMemoryOperand(const SDValue & Op,InlineAsm::ConstraintCode ConstraintID,std::vector<SDValue> & OutOps)2735 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2736 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2737 std::vector<SDValue> &OutOps) {
2738 // Always produce a register and immediate operand, as expected by
2739 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2740 switch (ConstraintID) {
2741 case InlineAsm::ConstraintCode::o:
2742 case InlineAsm::ConstraintCode::m: {
2743 SDValue Op0, Op1;
2744 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2745 assert(Found && "SelectAddrRegImm should always succeed");
2746 OutOps.push_back(Op0);
2747 OutOps.push_back(Op1);
2748 return false;
2749 }
2750 case InlineAsm::ConstraintCode::A:
2751 OutOps.push_back(Op);
2752 OutOps.push_back(
2753 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2754 return false;
2755 default:
2756 report_fatal_error("Unexpected asm memory constraint " +
2757 InlineAsm::getMemConstraintName(ConstraintID));
2758 }
2759
2760 return true;
2761 }
2762
SelectAddrFrameIndex(SDValue Addr,SDValue & Base,SDValue & Offset)2763 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2764 SDValue &Offset) {
2765 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2766 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2767 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2768 return true;
2769 }
2770
2771 return false;
2772 }
2773
2774 // Fold constant addresses.
selectConstantAddr(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,const RISCVSubtarget * Subtarget,SDValue Addr,SDValue & Base,SDValue & Offset,bool IsPrefetch=false)2775 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2776 const MVT VT, const RISCVSubtarget *Subtarget,
2777 SDValue Addr, SDValue &Base, SDValue &Offset,
2778 bool IsPrefetch = false) {
2779 if (!isa<ConstantSDNode>(Addr))
2780 return false;
2781
2782 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2783
2784 // If the constant is a simm12, we can fold the whole constant and use X0 as
2785 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2786 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2787 int64_t Lo12 = SignExtend64<12>(CVal);
2788 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2789 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2790 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2791 return false;
2792 if (Hi) {
2793 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2794 Base = SDValue(
2795 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2796 CurDAG->getTargetConstant(Hi20, DL, VT)),
2797 0);
2798 } else {
2799 Base = CurDAG->getRegister(RISCV::X0, VT);
2800 }
2801 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2802 return true;
2803 }
2804
2805 // Ask how constant materialization would handle this constant.
2806 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2807
2808 // If the last instruction would be an ADDI, we can fold its immediate and
2809 // emit the rest of the sequence as the base.
2810 if (Seq.back().getOpcode() != RISCV::ADDI)
2811 return false;
2812 Lo12 = Seq.back().getImm();
2813 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2814 return false;
2815
2816 // Drop the last instruction.
2817 Seq.pop_back();
2818 assert(!Seq.empty() && "Expected more instructions in sequence");
2819
2820 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2821 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT);
2822 return true;
2823 }
2824
2825 // Is this ADD instruction only used as the base pointer of scalar loads and
2826 // stores?
isWorthFoldingAdd(SDValue Add)2827 static bool isWorthFoldingAdd(SDValue Add) {
2828 for (auto *User : Add->users()) {
2829 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
2830 User->getOpcode() != ISD::ATOMIC_LOAD &&
2831 User->getOpcode() != ISD::ATOMIC_STORE)
2832 return false;
2833 EVT VT = cast<MemSDNode>(User)->getMemoryVT();
2834 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2835 VT != MVT::f64)
2836 return false;
2837 // Don't allow stores of the value. It must be used as the address.
2838 if (User->getOpcode() == ISD::STORE &&
2839 cast<StoreSDNode>(User)->getValue() == Add)
2840 return false;
2841 if (User->getOpcode() == ISD::ATOMIC_STORE &&
2842 cast<AtomicSDNode>(User)->getVal() == Add)
2843 return false;
2844 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
2845 return false;
2846 }
2847
2848 return true;
2849 }
2850
SelectAddrRegImm(SDValue Addr,SDValue & Base,SDValue & Offset)2851 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2852 SDValue &Offset) {
2853 if (SelectAddrFrameIndex(Addr, Base, Offset))
2854 return true;
2855
2856 SDLoc DL(Addr);
2857 MVT VT = Addr.getSimpleValueType();
2858
2859 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2860 Base = Addr.getOperand(0);
2861 Offset = Addr.getOperand(1);
2862 return true;
2863 }
2864
2865 if (CurDAG->isBaseWithConstantOffset(Addr)) {
2866 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2867 if (isInt<12>(CVal) && isInt<12>(CVal)) {
2868 Base = Addr.getOperand(0);
2869 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2870 SDValue LoOperand = Base.getOperand(1);
2871 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2872 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2873 // (its low part, really), then we can rely on the alignment of that
2874 // variable to provide a margin of safety before low part can overflow
2875 // the 12 bits of the load/store offset. Check if CVal falls within
2876 // that margin; if so (low part + CVal) can't overflow.
2877 const DataLayout &DL = CurDAG->getDataLayout();
2878 Align Alignment = commonAlignment(
2879 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2880 if ((CVal == 0 || Alignment > CVal)) {
2881 int64_t CombinedOffset = CVal + GA->getOffset();
2882 Base = Base.getOperand(0);
2883 Offset = CurDAG->getTargetGlobalAddress(
2884 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2885 CombinedOffset, GA->getTargetFlags());
2886 return true;
2887 }
2888 }
2889 }
2890
2891 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2892 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2893 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2894 return true;
2895 }
2896 }
2897
2898 // Handle ADD with large immediates.
2899 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2900 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2901 assert(!isInt<12>(CVal) && "simm12 not already handled?");
2902
2903 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2904 // an ADDI for part of the offset and fold the rest into the load/store.
2905 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2906 if (CVal >= -4096 && CVal <= 4094) {
2907 int64_t Adj = CVal < 0 ? -2048 : 2047;
2908 Base = SDValue(
2909 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2910 CurDAG->getSignedTargetConstant(Adj, DL, VT)),
2911 0);
2912 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT);
2913 return true;
2914 }
2915
2916 // For larger immediates, we might be able to save one instruction from
2917 // constant materialization by folding the Lo12 bits of the immediate into
2918 // the address. We should only do this if the ADD is only used by loads and
2919 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2920 // separately with the full materialized immediate creating extra
2921 // instructions.
2922 if (isWorthFoldingAdd(Addr) &&
2923 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2924 Offset, /*IsPrefetch=*/false)) {
2925 // Insert an ADD instruction with the materialized Hi52 bits.
2926 Base = SDValue(
2927 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2928 0);
2929 return true;
2930 }
2931 }
2932
2933 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
2934 /*IsPrefetch=*/false))
2935 return true;
2936
2937 Base = Addr;
2938 Offset = CurDAG->getTargetConstant(0, DL, VT);
2939 return true;
2940 }
2941
2942 /// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9.
SelectAddrRegImm9(SDValue Addr,SDValue & Base,SDValue & Offset)2943 bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base,
2944 SDValue &Offset) {
2945 // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only
2946 // a 9-bit immediate can be folded.
2947
2948 SDLoc DL(Addr);
2949 MVT VT = Addr.getSimpleValueType();
2950
2951 if (CurDAG->isBaseWithConstantOffset(Addr)) {
2952 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2953 if (isUInt<9>(CVal)) {
2954 Base = Addr.getOperand(0);
2955
2956 // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only
2957 // a 9-bit immediate can be folded.
2958 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2959 return true;
2960 }
2961 }
2962
2963 Base = Addr;
2964 Offset = CurDAG->getTargetConstant(0, DL, VT);
2965 return true;
2966 }
2967
2968 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2969 /// Offset should be all zeros.
SelectAddrRegImmLsb00000(SDValue Addr,SDValue & Base,SDValue & Offset)2970 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2971 SDValue &Offset) {
2972 if (SelectAddrFrameIndex(Addr, Base, Offset))
2973 return true;
2974
2975 SDLoc DL(Addr);
2976 MVT VT = Addr.getSimpleValueType();
2977
2978 if (CurDAG->isBaseWithConstantOffset(Addr)) {
2979 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2980 if (isInt<12>(CVal)) {
2981 Base = Addr.getOperand(0);
2982
2983 // Early-out if not a valid offset.
2984 if ((CVal & 0b11111) != 0) {
2985 Base = Addr;
2986 Offset = CurDAG->getTargetConstant(0, DL, VT);
2987 return true;
2988 }
2989
2990 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2991 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2992 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT);
2993 return true;
2994 }
2995 }
2996
2997 // Handle ADD with large immediates.
2998 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2999 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
3000 assert(!isInt<12>(CVal) && "simm12 not already handled?");
3001
3002 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
3003 // one instruction by folding adjustment (-2048 or 2016) into the address.
3004 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
3005 int64_t Adj = CVal < 0 ? -2048 : 2016;
3006 int64_t AdjustedOffset = CVal - Adj;
3007 Base =
3008 SDValue(CurDAG->getMachineNode(
3009 RISCV::ADDI, DL, VT, Addr.getOperand(0),
3010 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)),
3011 0);
3012 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT);
3013 return true;
3014 }
3015
3016 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
3017 Offset, /*IsPrefetch=*/true)) {
3018 // Insert an ADD instruction with the materialized Hi52 bits.
3019 Base = SDValue(
3020 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
3021 0);
3022 return true;
3023 }
3024 }
3025
3026 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset,
3027 /*IsPrefetch=*/true))
3028 return true;
3029
3030 Base = Addr;
3031 Offset = CurDAG->getTargetConstant(0, DL, VT);
3032 return true;
3033 }
3034
SelectAddrRegRegScale(SDValue Addr,unsigned MaxShiftAmount,SDValue & Base,SDValue & Index,SDValue & Scale)3035 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
3036 unsigned MaxShiftAmount,
3037 SDValue &Base, SDValue &Index,
3038 SDValue &Scale) {
3039 EVT VT = Addr.getSimpleValueType();
3040 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
3041 SDValue &Shift) {
3042 uint64_t ShiftAmt = 0;
3043 Index = N;
3044
3045 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
3046 // Only match shifts by a value in range [0, MaxShiftAmount].
3047 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
3048 Index = N.getOperand(0);
3049 ShiftAmt = N.getConstantOperandVal(1);
3050 }
3051 }
3052
3053 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
3054 return ShiftAmt != 0;
3055 };
3056
3057 if (Addr.getOpcode() == ISD::ADD) {
3058 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3059 SDValue AddrB = Addr.getOperand(0);
3060 if (AddrB.getOpcode() == ISD::ADD &&
3061 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
3062 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
3063 isInt<12>(C1->getSExtValue())) {
3064 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
3065 SDValue C1Val =
3066 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
3067 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
3068 AddrB.getOperand(1), C1Val),
3069 0);
3070 return true;
3071 }
3072 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
3073 Base = Addr.getOperand(1);
3074 return true;
3075 } else {
3076 UnwrapShl(Addr.getOperand(1), Index, Scale);
3077 Base = Addr.getOperand(0);
3078 return true;
3079 }
3080 }
3081
3082 return false;
3083 }
3084
SelectAddrRegReg(SDValue Addr,SDValue & Base,SDValue & Offset)3085 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
3086 SDValue &Offset) {
3087 if (Addr.getOpcode() != ISD::ADD)
3088 return false;
3089
3090 if (isa<ConstantSDNode>(Addr.getOperand(1)))
3091 return false;
3092
3093 Base = Addr.getOperand(0);
3094 Offset = Addr.getOperand(1);
3095 return true;
3096 }
3097
selectShiftMask(SDValue N,unsigned ShiftWidth,SDValue & ShAmt)3098 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
3099 SDValue &ShAmt) {
3100 ShAmt = N;
3101
3102 // Peek through zext.
3103 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
3104 ShAmt = ShAmt.getOperand(0);
3105
3106 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
3107 // amount. If there is an AND on the shift amount, we can bypass it if it
3108 // doesn't affect any of those bits.
3109 if (ShAmt.getOpcode() == ISD::AND &&
3110 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3111 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
3112
3113 // Since the max shift amount is a power of 2 we can subtract 1 to make a
3114 // mask that covers the bits needed to represent all shift amounts.
3115 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
3116 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
3117
3118 if (ShMask.isSubsetOf(AndMask)) {
3119 ShAmt = ShAmt.getOperand(0);
3120 } else {
3121 // SimplifyDemandedBits may have optimized the mask so try restoring any
3122 // bits that are known zero.
3123 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
3124 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
3125 return true;
3126 ShAmt = ShAmt.getOperand(0);
3127 }
3128 }
3129
3130 if (ShAmt.getOpcode() == ISD::ADD &&
3131 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
3132 uint64_t Imm = ShAmt.getConstantOperandVal(1);
3133 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
3134 // to avoid the ADD.
3135 if (Imm != 0 && Imm % ShiftWidth == 0) {
3136 ShAmt = ShAmt.getOperand(0);
3137 return true;
3138 }
3139 } else if (ShAmt.getOpcode() == ISD::SUB &&
3140 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
3141 uint64_t Imm = ShAmt.getConstantOperandVal(0);
3142 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
3143 // generate a NEG instead of a SUB of a constant.
3144 if (Imm != 0 && Imm % ShiftWidth == 0) {
3145 SDLoc DL(ShAmt);
3146 EVT VT = ShAmt.getValueType();
3147 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
3148 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
3149 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
3150 ShAmt.getOperand(1));
3151 ShAmt = SDValue(Neg, 0);
3152 return true;
3153 }
3154 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3155 // to generate a NOT instead of a SUB of a constant.
3156 if (Imm % ShiftWidth == ShiftWidth - 1) {
3157 SDLoc DL(ShAmt);
3158 EVT VT = ShAmt.getValueType();
3159 MachineSDNode *Not = CurDAG->getMachineNode(
3160 RISCV::XORI, DL, VT, ShAmt.getOperand(1),
3161 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true));
3162 ShAmt = SDValue(Not, 0);
3163 return true;
3164 }
3165 }
3166
3167 return true;
3168 }
3169
3170 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
3171 /// check for equality with 0. This function emits instructions that convert the
3172 /// seteq/setne into something that can be compared with 0.
3173 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
3174 /// ISD::SETNE).
selectSETCC(SDValue N,ISD::CondCode ExpectedCCVal,SDValue & Val)3175 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
3176 SDValue &Val) {
3177 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
3178 "Unexpected condition code!");
3179
3180 // We're looking for a setcc.
3181 if (N->getOpcode() != ISD::SETCC)
3182 return false;
3183
3184 // Must be an equality comparison.
3185 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
3186 if (CCVal != ExpectedCCVal)
3187 return false;
3188
3189 SDValue LHS = N->getOperand(0);
3190 SDValue RHS = N->getOperand(1);
3191
3192 if (!LHS.getValueType().isScalarInteger())
3193 return false;
3194
3195 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
3196 if (isNullConstant(RHS)) {
3197 Val = LHS;
3198 return true;
3199 }
3200
3201 SDLoc DL(N);
3202
3203 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
3204 int64_t CVal = C->getSExtValue();
3205 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
3206 // non-zero otherwise.
3207 if (CVal == -2048) {
3208 Val = SDValue(
3209 CurDAG->getMachineNode(
3210 RISCV::XORI, DL, N->getValueType(0), LHS,
3211 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))),
3212 0);
3213 return true;
3214 }
3215 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
3216 // LHS is equal to the RHS and non-zero otherwise.
3217 if (isInt<12>(CVal) || CVal == 2048) {
3218 Val = SDValue(
3219 CurDAG->getMachineNode(
3220 RISCV::ADDI, DL, N->getValueType(0), LHS,
3221 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3222 0);
3223 return true;
3224 }
3225 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) {
3226 Val = SDValue(
3227 CurDAG->getMachineNode(
3228 RISCV::BINVI, DL, N->getValueType(0), LHS,
3229 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))),
3230 0);
3231 return true;
3232 }
3233 // Same as the addi case above but for larger immediates (signed 26-bit) use
3234 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid
3235 // anything which can be done with a single lui as it might be compressible.
3236 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) &&
3237 (CVal & 0xFFF) != 0) {
3238 Val = SDValue(
3239 CurDAG->getMachineNode(
3240 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS,
3241 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))),
3242 0);
3243 return true;
3244 }
3245 }
3246
3247 // If nothing else we can XOR the LHS and RHS to produce zero if they are
3248 // equal and a non-zero value if they aren't.
3249 Val = SDValue(
3250 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
3251 return true;
3252 }
3253
selectSExtBits(SDValue N,unsigned Bits,SDValue & Val)3254 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3255 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3256 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
3257 Val = N.getOperand(0);
3258 return true;
3259 }
3260
3261 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
3262 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
3263 return N;
3264
3265 SDValue N0 = N.getOperand(0);
3266 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3267 N.getConstantOperandVal(1) == ShiftAmt &&
3268 N0.getConstantOperandVal(1) == ShiftAmt)
3269 return N0.getOperand(0);
3270
3271 return N;
3272 };
3273
3274 MVT VT = N.getSimpleValueType();
3275 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
3276 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
3277 return true;
3278 }
3279
3280 return false;
3281 }
3282
selectZExtBits(SDValue N,unsigned Bits,SDValue & Val)3283 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
3284 if (N.getOpcode() == ISD::AND) {
3285 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
3286 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
3287 Val = N.getOperand(0);
3288 return true;
3289 }
3290 }
3291 MVT VT = N.getSimpleValueType();
3292 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
3293 if (CurDAG->MaskedValueIsZero(N, Mask)) {
3294 Val = N;
3295 return true;
3296 }
3297
3298 return false;
3299 }
3300
3301 /// Look for various patterns that can be done with a SHL that can be folded
3302 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
3303 /// SHXADD we are trying to match.
selectSHXADDOp(SDValue N,unsigned ShAmt,SDValue & Val)3304 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
3305 SDValue &Val) {
3306 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
3307 SDValue N0 = N.getOperand(0);
3308
3309 if (bool LeftShift = N0.getOpcode() == ISD::SHL;
3310 (LeftShift || N0.getOpcode() == ISD::SRL) &&
3311 isa<ConstantSDNode>(N0.getOperand(1))) {
3312 uint64_t Mask = N.getConstantOperandVal(1);
3313 unsigned C2 = N0.getConstantOperandVal(1);
3314
3315 unsigned XLen = Subtarget->getXLen();
3316 if (LeftShift)
3317 Mask &= maskTrailingZeros<uint64_t>(C2);
3318 else
3319 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
3320
3321 if (isShiftedMask_64(Mask)) {
3322 unsigned Leading = XLen - llvm::bit_width(Mask);
3323 unsigned Trailing = llvm::countr_zero(Mask);
3324 if (Trailing != ShAmt)
3325 return false;
3326
3327 unsigned Opcode;
3328 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
3329 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2
3330 // followed by a SHXADD with c3 for the X amount.
3331 if (LeftShift && Leading == 0 && C2 < Trailing)
3332 Opcode = RISCV::SRLI;
3333 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2
3334 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2
3335 // followed by a SHXADD with c3 for the X amount.
3336 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing)
3337 Opcode = RISCV::SRLIW;
3338 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
3339 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
3340 // followed by a SHXADD using c3 for the X amount.
3341 else if (!LeftShift && Leading == C2)
3342 Opcode = RISCV::SRLI;
3343 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2
3344 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3
3345 // followed by a SHXADD using c3 for the X amount.
3346 else if (!LeftShift && Leading == 32 + C2)
3347 Opcode = RISCV::SRLIW;
3348 else
3349 return false;
3350
3351 SDLoc DL(N);
3352 EVT VT = N.getValueType();
3353 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2;
3354 Val = SDValue(
3355 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0),
3356 CurDAG->getTargetConstant(ShAmt, DL, VT)),
3357 0);
3358 return true;
3359 }
3360 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() &&
3361 isa<ConstantSDNode>(N0.getOperand(1))) {
3362 uint64_t Mask = N.getConstantOperandVal(1);
3363 unsigned C2 = N0.getConstantOperandVal(1);
3364
3365 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3
3366 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can
3367 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as
3368 // the X amount.
3369 if (isShiftedMask_64(Mask)) {
3370 unsigned XLen = Subtarget->getXLen();
3371 unsigned Leading = XLen - llvm::bit_width(Mask);
3372 unsigned Trailing = llvm::countr_zero(Mask);
3373 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) {
3374 SDLoc DL(N);
3375 EVT VT = N.getValueType();
3376 Val = SDValue(CurDAG->getMachineNode(
3377 RISCV::SRAI, DL, VT, N0.getOperand(0),
3378 CurDAG->getTargetConstant(C2 - Leading, DL, VT)),
3379 0);
3380 Val = SDValue(CurDAG->getMachineNode(
3381 RISCV::SRLI, DL, VT, Val,
3382 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)),
3383 0);
3384 return true;
3385 }
3386 }
3387 }
3388 } else if (bool LeftShift = N.getOpcode() == ISD::SHL;
3389 (LeftShift || N.getOpcode() == ISD::SRL) &&
3390 isa<ConstantSDNode>(N.getOperand(1))) {
3391 SDValue N0 = N.getOperand(0);
3392 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
3393 isa<ConstantSDNode>(N0.getOperand(1))) {
3394 uint64_t Mask = N0.getConstantOperandVal(1);
3395 if (isShiftedMask_64(Mask)) {
3396 unsigned C1 = N.getConstantOperandVal(1);
3397 unsigned XLen = Subtarget->getXLen();
3398 unsigned Leading = XLen - llvm::bit_width(Mask);
3399 unsigned Trailing = llvm::countr_zero(Mask);
3400 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
3401 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
3402 if (LeftShift && Leading == 32 && Trailing > 0 &&
3403 (Trailing + C1) == ShAmt) {
3404 SDLoc DL(N);
3405 EVT VT = N.getValueType();
3406 Val = SDValue(CurDAG->getMachineNode(
3407 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3408 CurDAG->getTargetConstant(Trailing, DL, VT)),
3409 0);
3410 return true;
3411 }
3412 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3413 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3414 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3415 (Trailing - C1) == ShAmt) {
3416 SDLoc DL(N);
3417 EVT VT = N.getValueType();
3418 Val = SDValue(CurDAG->getMachineNode(
3419 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3420 CurDAG->getTargetConstant(Trailing, DL, VT)),
3421 0);
3422 return true;
3423 }
3424 }
3425 }
3426 }
3427
3428 return false;
3429 }
3430
3431 /// Look for various patterns that can be done with a SHL that can be folded
3432 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3433 /// SHXADD_UW we are trying to match.
selectSHXADD_UWOp(SDValue N,unsigned ShAmt,SDValue & Val)3434 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3435 SDValue &Val) {
3436 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3437 N.hasOneUse()) {
3438 SDValue N0 = N.getOperand(0);
3439 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3440 N0.hasOneUse()) {
3441 uint64_t Mask = N.getConstantOperandVal(1);
3442 unsigned C2 = N0.getConstantOperandVal(1);
3443
3444 Mask &= maskTrailingZeros<uint64_t>(C2);
3445
3446 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3447 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3448 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3449 if (isShiftedMask_64(Mask)) {
3450 unsigned Leading = llvm::countl_zero(Mask);
3451 unsigned Trailing = llvm::countr_zero(Mask);
3452 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3453 SDLoc DL(N);
3454 EVT VT = N.getValueType();
3455 Val = SDValue(CurDAG->getMachineNode(
3456 RISCV::SLLI, DL, VT, N0.getOperand(0),
3457 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3458 0);
3459 return true;
3460 }
3461 }
3462 }
3463 }
3464
3465 return false;
3466 }
3467
orDisjoint(const SDNode * N) const3468 bool RISCVDAGToDAGISel::orDisjoint(const SDNode *N) const {
3469 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL);
3470 if (N->getFlags().hasDisjoint())
3471 return true;
3472 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
3473 }
3474
selectImm64IfCheaper(int64_t Imm,int64_t OrigImm,SDValue N,SDValue & Val)3475 bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm,
3476 SDValue N, SDValue &Val) {
3477 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget,
3478 /*CompressionCost=*/true);
3479 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget,
3480 /*CompressionCost=*/true);
3481 if (OrigCost <= Cost)
3482 return false;
3483
3484 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget);
3485 return true;
3486 }
3487
selectZExtImm32(SDValue N,SDValue & Val)3488 bool RISCVDAGToDAGISel::selectZExtImm32(SDValue N, SDValue &Val) {
3489 if (!isa<ConstantSDNode>(N))
3490 return false;
3491 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3492 if ((Imm >> 31) != 1)
3493 return false;
3494
3495 for (const SDNode *U : N->users()) {
3496 switch (U->getOpcode()) {
3497 case ISD::ADD:
3498 break;
3499 case ISD::OR:
3500 if (orDisjoint(U))
3501 break;
3502 return false;
3503 default:
3504 return false;
3505 }
3506 }
3507
3508 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val);
3509 }
3510
selectNegImm(SDValue N,SDValue & Val)3511 bool RISCVDAGToDAGISel::selectNegImm(SDValue N, SDValue &Val) {
3512 if (!isa<ConstantSDNode>(N))
3513 return false;
3514 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3515 if (isInt<32>(Imm))
3516 return false;
3517
3518 for (const SDNode *U : N->users()) {
3519 switch (U->getOpcode()) {
3520 case ISD::ADD:
3521 break;
3522 case RISCVISD::VMV_V_X_VL:
3523 if (!all_of(U->users(), [](const SDNode *V) {
3524 return V->getOpcode() == ISD::ADD ||
3525 V->getOpcode() == RISCVISD::ADD_VL;
3526 }))
3527 return false;
3528 break;
3529 default:
3530 return false;
3531 }
3532 }
3533
3534 return selectImm64IfCheaper(-Imm, Imm, N, Val);
3535 }
3536
selectInvLogicImm(SDValue N,SDValue & Val)3537 bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) {
3538 if (!isa<ConstantSDNode>(N))
3539 return false;
3540 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue();
3541
3542 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI.
3543 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1))
3544 return false;
3545
3546 // Abandon this transform if the constant is needed elsewhere.
3547 for (const SDNode *U : N->users()) {
3548 switch (U->getOpcode()) {
3549 case ISD::AND:
3550 case ISD::OR:
3551 case ISD::XOR:
3552 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()))
3553 return false;
3554 break;
3555 case RISCVISD::VMV_V_X_VL:
3556 if (!Subtarget->hasStdExtZvkb())
3557 return false;
3558 if (!all_of(U->users(), [](const SDNode *V) {
3559 return V->getOpcode() == ISD::AND ||
3560 V->getOpcode() == RISCVISD::AND_VL;
3561 }))
3562 return false;
3563 break;
3564 default:
3565 return false;
3566 }
3567 }
3568
3569 if (isInt<32>(Imm)) {
3570 Val =
3571 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget);
3572 return true;
3573 }
3574
3575 // For 64-bit constants, the instruction sequences get complex,
3576 // so we select inverted only if it's cheaper.
3577 return selectImm64IfCheaper(~Imm, Imm, N, Val);
3578 }
3579
vectorPseudoHasAllNBitUsers(SDNode * User,unsigned UserOpNo,unsigned Bits,const TargetInstrInfo * TII)3580 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3581 unsigned Bits,
3582 const TargetInstrInfo *TII) {
3583 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3584
3585 if (!MCOpcode)
3586 return false;
3587
3588 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3589 const uint64_t TSFlags = MCID.TSFlags;
3590 if (!RISCVII::hasSEWOp(TSFlags))
3591 return false;
3592 assert(RISCVII::hasVLOp(TSFlags));
3593
3594 unsigned ChainOpIdx = User->getNumOperands() - 1;
3595 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3596 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3597 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2;
3598 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3599
3600 if (UserOpNo == VLIdx)
3601 return false;
3602
3603 auto NumDemandedBits =
3604 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3605 return NumDemandedBits && Bits >= *NumDemandedBits;
3606 }
3607
3608 // Return true if all users of this SDNode* only consume the lower \p Bits.
3609 // This can be used to form W instructions for add/sub/mul/shl even when the
3610 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3611 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
3612 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3613 // the add/sub/mul/shl to become non-W instructions. By checking the users we
3614 // may be able to use a W instruction and CSE with the other instruction if
3615 // this has happened. We could try to detect that the CSE opportunity exists
3616 // before doing this, but that would be more complicated.
hasAllNBitUsers(SDNode * Node,unsigned Bits,const unsigned Depth) const3617 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3618 const unsigned Depth) const {
3619 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3620 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3621 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3622 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3623 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3624 isa<ConstantSDNode>(Node) || Depth != 0) &&
3625 "Unexpected opcode");
3626
3627 if (Depth >= SelectionDAG::MaxRecursionDepth)
3628 return false;
3629
3630 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3631 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3632 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3633 return false;
3634
3635 for (SDUse &Use : Node->uses()) {
3636 SDNode *User = Use.getUser();
3637 // Users of this node should have already been instruction selected
3638 if (!User->isMachineOpcode())
3639 return false;
3640
3641 // TODO: Add more opcodes?
3642 switch (User->getMachineOpcode()) {
3643 default:
3644 if (vectorPseudoHasAllNBitUsers(User, Use.getOperandNo(), Bits, TII))
3645 break;
3646 return false;
3647 case RISCV::ADDW:
3648 case RISCV::ADDIW:
3649 case RISCV::SUBW:
3650 case RISCV::MULW:
3651 case RISCV::SLLW:
3652 case RISCV::SLLIW:
3653 case RISCV::SRAW:
3654 case RISCV::SRAIW:
3655 case RISCV::SRLW:
3656 case RISCV::SRLIW:
3657 case RISCV::DIVW:
3658 case RISCV::DIVUW:
3659 case RISCV::REMW:
3660 case RISCV::REMUW:
3661 case RISCV::ROLW:
3662 case RISCV::RORW:
3663 case RISCV::RORIW:
3664 case RISCV::CLZW:
3665 case RISCV::CTZW:
3666 case RISCV::CPOPW:
3667 case RISCV::SLLI_UW:
3668 case RISCV::FMV_W_X:
3669 case RISCV::FCVT_H_W:
3670 case RISCV::FCVT_H_W_INX:
3671 case RISCV::FCVT_H_WU:
3672 case RISCV::FCVT_H_WU_INX:
3673 case RISCV::FCVT_S_W:
3674 case RISCV::FCVT_S_W_INX:
3675 case RISCV::FCVT_S_WU:
3676 case RISCV::FCVT_S_WU_INX:
3677 case RISCV::FCVT_D_W:
3678 case RISCV::FCVT_D_W_INX:
3679 case RISCV::FCVT_D_WU:
3680 case RISCV::FCVT_D_WU_INX:
3681 case RISCV::TH_REVW:
3682 case RISCV::TH_SRRIW:
3683 if (Bits >= 32)
3684 break;
3685 return false;
3686 case RISCV::SLL:
3687 case RISCV::SRA:
3688 case RISCV::SRL:
3689 case RISCV::ROL:
3690 case RISCV::ROR:
3691 case RISCV::BSET:
3692 case RISCV::BCLR:
3693 case RISCV::BINV:
3694 // Shift amount operands only use log2(Xlen) bits.
3695 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen()))
3696 break;
3697 return false;
3698 case RISCV::SLLI:
3699 // SLLI only uses the lower (XLen - ShAmt) bits.
3700 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1))
3701 break;
3702 return false;
3703 case RISCV::ANDI:
3704 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3705 break;
3706 goto RecCheck;
3707 case RISCV::ORI: {
3708 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3709 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3710 break;
3711 [[fallthrough]];
3712 }
3713 case RISCV::AND:
3714 case RISCV::OR:
3715 case RISCV::XOR:
3716 case RISCV::XORI:
3717 case RISCV::ANDN:
3718 case RISCV::ORN:
3719 case RISCV::XNOR:
3720 case RISCV::SH1ADD:
3721 case RISCV::SH2ADD:
3722 case RISCV::SH3ADD:
3723 RecCheck:
3724 if (hasAllNBitUsers(User, Bits, Depth + 1))
3725 break;
3726 return false;
3727 case RISCV::SRLI: {
3728 unsigned ShAmt = User->getConstantOperandVal(1);
3729 // If we are shifting right by less than Bits, and users don't demand any
3730 // bits that were shifted into [Bits-1:0], then we can consider this as an
3731 // N-Bit user.
3732 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3733 break;
3734 return false;
3735 }
3736 case RISCV::SEXT_B:
3737 case RISCV::PACKH:
3738 if (Bits >= 8)
3739 break;
3740 return false;
3741 case RISCV::SEXT_H:
3742 case RISCV::FMV_H_X:
3743 case RISCV::ZEXT_H_RV32:
3744 case RISCV::ZEXT_H_RV64:
3745 case RISCV::PACKW:
3746 if (Bits >= 16)
3747 break;
3748 return false;
3749 case RISCV::PACK:
3750 if (Bits >= (Subtarget->getXLen() / 2))
3751 break;
3752 return false;
3753 case RISCV::ADD_UW:
3754 case RISCV::SH1ADD_UW:
3755 case RISCV::SH2ADD_UW:
3756 case RISCV::SH3ADD_UW:
3757 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3758 // 32 bits.
3759 if (Use.getOperandNo() == 0 && Bits >= 32)
3760 break;
3761 return false;
3762 case RISCV::SB:
3763 if (Use.getOperandNo() == 0 && Bits >= 8)
3764 break;
3765 return false;
3766 case RISCV::SH:
3767 if (Use.getOperandNo() == 0 && Bits >= 16)
3768 break;
3769 return false;
3770 case RISCV::SW:
3771 if (Use.getOperandNo() == 0 && Bits >= 32)
3772 break;
3773 return false;
3774 }
3775 }
3776
3777 return true;
3778 }
3779
3780 // Select a constant that can be represented as (sign_extend(imm5) << imm2).
selectSimm5Shl2(SDValue N,SDValue & Simm5,SDValue & Shl2)3781 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3782 SDValue &Shl2) {
3783 auto *C = dyn_cast<ConstantSDNode>(N);
3784 if (!C)
3785 return false;
3786
3787 int64_t Offset = C->getSExtValue();
3788 for (unsigned Shift = 0; Shift < 4; Shift++) {
3789 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) {
3790 EVT VT = N->getValueType(0);
3791 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT);
3792 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT);
3793 return true;
3794 }
3795 }
3796
3797 return false;
3798 }
3799
3800 // Select VL as a 5 bit immediate or a value that will become a register. This
3801 // allows us to choose between VSETIVLI or VSETVLI later.
selectVLOp(SDValue N,SDValue & VL)3802 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3803 auto *C = dyn_cast<ConstantSDNode>(N);
3804 if (C && isUInt<5>(C->getZExtValue())) {
3805 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3806 N->getValueType(0));
3807 } else if (C && C->isAllOnes()) {
3808 // Treat all ones as VLMax.
3809 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3810 N->getValueType(0));
3811 } else if (isa<RegisterSDNode>(N) &&
3812 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3813 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3814 // as the register class. Convert X0 to a special immediate to pass the
3815 // MachineVerifier. This is recognized specially by the vsetvli insertion
3816 // pass.
3817 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3818 N->getValueType(0));
3819 } else {
3820 VL = N;
3821 }
3822
3823 return true;
3824 }
3825
findVSplat(SDValue N)3826 static SDValue findVSplat(SDValue N) {
3827 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3828 if (!N.getOperand(0).isUndef())
3829 return SDValue();
3830 N = N.getOperand(1);
3831 }
3832 SDValue Splat = N;
3833 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3834 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3835 !Splat.getOperand(0).isUndef())
3836 return SDValue();
3837 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3838 return Splat;
3839 }
3840
selectVSplat(SDValue N,SDValue & SplatVal)3841 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3842 SDValue Splat = findVSplat(N);
3843 if (!Splat)
3844 return false;
3845
3846 SplatVal = Splat.getOperand(1);
3847 return true;
3848 }
3849
selectVSplatImmHelper(SDValue N,SDValue & SplatVal,SelectionDAG & DAG,const RISCVSubtarget & Subtarget,std::function<bool (int64_t)> ValidateImm,bool Decrement=false)3850 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3851 SelectionDAG &DAG,
3852 const RISCVSubtarget &Subtarget,
3853 std::function<bool(int64_t)> ValidateImm,
3854 bool Decrement = false) {
3855 SDValue Splat = findVSplat(N);
3856 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3857 return false;
3858
3859 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3860 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3861 "Unexpected splat operand type");
3862
3863 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3864 // type is wider than the resulting vector element type: an implicit
3865 // truncation first takes place. Therefore, perform a manual
3866 // truncation/sign-extension in order to ignore any truncated bits and catch
3867 // any zero-extended immediate.
3868 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3869 // sign-extending to (XLenVT -1).
3870 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3871
3872 int64_t SplatImm = SplatConst.getSExtValue();
3873
3874 if (!ValidateImm(SplatImm))
3875 return false;
3876
3877 if (Decrement)
3878 SplatImm -= 1;
3879
3880 SplatVal =
3881 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3882 return true;
3883 }
3884
selectVSplatSimm5(SDValue N,SDValue & SplatVal)3885 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3886 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3887 [](int64_t Imm) { return isInt<5>(Imm); });
3888 }
3889
selectVSplatSimm5Plus1(SDValue N,SDValue & SplatVal)3890 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3891 return selectVSplatImmHelper(
3892 N, SplatVal, *CurDAG, *Subtarget,
3893 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
3894 /*Decrement=*/true);
3895 }
3896
selectVSplatSimm5Plus1NoDec(SDValue N,SDValue & SplatVal)3897 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) {
3898 return selectVSplatImmHelper(
3899 N, SplatVal, *CurDAG, *Subtarget,
3900 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; },
3901 /*Decrement=*/false);
3902 }
3903
selectVSplatSimm5Plus1NonZero(SDValue N,SDValue & SplatVal)3904 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3905 SDValue &SplatVal) {
3906 return selectVSplatImmHelper(
3907 N, SplatVal, *CurDAG, *Subtarget,
3908 [](int64_t Imm) {
3909 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3910 },
3911 /*Decrement=*/true);
3912 }
3913
selectVSplatUimm(SDValue N,unsigned Bits,SDValue & SplatVal)3914 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3915 SDValue &SplatVal) {
3916 return selectVSplatImmHelper(
3917 N, SplatVal, *CurDAG, *Subtarget,
3918 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3919 }
3920
selectVSplatImm64Neg(SDValue N,SDValue & SplatVal)3921 bool RISCVDAGToDAGISel::selectVSplatImm64Neg(SDValue N, SDValue &SplatVal) {
3922 SDValue Splat = findVSplat(N);
3923 return Splat && selectNegImm(Splat.getOperand(1), SplatVal);
3924 }
3925
selectLow8BitsVSplat(SDValue N,SDValue & SplatVal)3926 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3927 auto IsExtOrTrunc = [](SDValue N) {
3928 switch (N->getOpcode()) {
3929 case ISD::SIGN_EXTEND:
3930 case ISD::ZERO_EXTEND:
3931 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3932 // inactive elements will be undef.
3933 case RISCVISD::TRUNCATE_VECTOR_VL:
3934 case RISCVISD::VSEXT_VL:
3935 case RISCVISD::VZEXT_VL:
3936 return true;
3937 default:
3938 return false;
3939 }
3940 };
3941
3942 // We can have multiple nested nodes, so unravel them all if needed.
3943 while (IsExtOrTrunc(N)) {
3944 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3945 return false;
3946 N = N->getOperand(0);
3947 }
3948
3949 return selectVSplat(N, SplatVal);
3950 }
3951
selectScalarFPAsInt(SDValue N,SDValue & Imm)3952 bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) {
3953 // Allow bitcasts from XLenVT -> FP.
3954 if (N.getOpcode() == ISD::BITCAST &&
3955 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) {
3956 Imm = N.getOperand(0);
3957 return true;
3958 }
3959 // Allow moves from XLenVT to FP.
3960 if (N.getOpcode() == RISCVISD::FMV_H_X ||
3961 N.getOpcode() == RISCVISD::FMV_W_X_RV64) {
3962 Imm = N.getOperand(0);
3963 return true;
3964 }
3965
3966 // Otherwise, look for FP constants that can materialized with scalar int.
3967 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3968 if (!CFP)
3969 return false;
3970 const APFloat &APF = CFP->getValueAPF();
3971 // td can handle +0.0 already.
3972 if (APF.isPosZero())
3973 return false;
3974
3975 MVT VT = CFP->getSimpleValueType(0);
3976
3977 MVT XLenVT = Subtarget->getXLenVT();
3978 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3979 assert(APF.isNegZero() && "Unexpected constant.");
3980 return false;
3981 }
3982 SDLoc DL(N);
3983 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3984 *Subtarget);
3985 return true;
3986 }
3987
selectRVVSimm5(SDValue N,unsigned Width,SDValue & Imm)3988 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3989 SDValue &Imm) {
3990 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3991 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3992
3993 if (!isInt<5>(ImmVal))
3994 return false;
3995
3996 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N),
3997 Subtarget->getXLenVT());
3998 return true;
3999 }
4000
4001 return false;
4002 }
4003
4004 // Try to remove sext.w if the input is a W instruction or can be made into
4005 // a W instruction cheaply.
doPeepholeSExtW(SDNode * N)4006 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
4007 // Look for the sext.w pattern, addiw rd, rs1, 0.
4008 if (N->getMachineOpcode() != RISCV::ADDIW ||
4009 !isNullConstant(N->getOperand(1)))
4010 return false;
4011
4012 SDValue N0 = N->getOperand(0);
4013 if (!N0.isMachineOpcode())
4014 return false;
4015
4016 switch (N0.getMachineOpcode()) {
4017 default:
4018 break;
4019 case RISCV::ADD:
4020 case RISCV::ADDI:
4021 case RISCV::SUB:
4022 case RISCV::MUL:
4023 case RISCV::SLLI: {
4024 // Convert sext.w+add/sub/mul to their W instructions. This will create
4025 // a new independent instruction. This improves latency.
4026 unsigned Opc;
4027 switch (N0.getMachineOpcode()) {
4028 default:
4029 llvm_unreachable("Unexpected opcode!");
4030 case RISCV::ADD: Opc = RISCV::ADDW; break;
4031 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
4032 case RISCV::SUB: Opc = RISCV::SUBW; break;
4033 case RISCV::MUL: Opc = RISCV::MULW; break;
4034 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
4035 }
4036
4037 SDValue N00 = N0.getOperand(0);
4038 SDValue N01 = N0.getOperand(1);
4039
4040 // Shift amount needs to be uimm5.
4041 if (N0.getMachineOpcode() == RISCV::SLLI &&
4042 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
4043 break;
4044
4045 SDNode *Result =
4046 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
4047 N00, N01);
4048 ReplaceUses(N, Result);
4049 return true;
4050 }
4051 case RISCV::ADDW:
4052 case RISCV::ADDIW:
4053 case RISCV::SUBW:
4054 case RISCV::MULW:
4055 case RISCV::SLLIW:
4056 case RISCV::PACKW:
4057 case RISCV::TH_MULAW:
4058 case RISCV::TH_MULAH:
4059 case RISCV::TH_MULSW:
4060 case RISCV::TH_MULSH:
4061 if (N0.getValueType() == MVT::i32)
4062 break;
4063
4064 // Result is already sign extended just remove the sext.w.
4065 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
4066 ReplaceUses(N, N0.getNode());
4067 return true;
4068 }
4069
4070 return false;
4071 }
4072
usesAllOnesMask(SDValue MaskOp)4073 static bool usesAllOnesMask(SDValue MaskOp) {
4074 const auto IsVMSet = [](unsigned Opc) {
4075 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
4076 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
4077 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
4078 Opc == RISCV::PseudoVMSET_M_B8;
4079 };
4080
4081 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
4082 // undefined behaviour if it's the wrong bitwidth, so we could choose to
4083 // assume that it's all-ones? Same applies to its VL.
4084 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode());
4085 }
4086
isImplicitDef(SDValue V)4087 static bool isImplicitDef(SDValue V) {
4088 if (!V.isMachineOpcode())
4089 return false;
4090 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
4091 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
4092 if (!isImplicitDef(V.getOperand(I)))
4093 return false;
4094 return true;
4095 }
4096 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
4097 }
4098
4099 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
4100 // corresponding "unmasked" pseudo versions.
doPeepholeMaskedRVV(MachineSDNode * N)4101 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
4102 const RISCV::RISCVMaskedPseudoInfo *I =
4103 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
4104 if (!I)
4105 return false;
4106
4107 unsigned MaskOpIdx = I->MaskOpIdx;
4108 if (!usesAllOnesMask(N->getOperand(MaskOpIdx)))
4109 return false;
4110
4111 // There are two classes of pseudos in the table - compares and
4112 // everything else. See the comment on RISCVMaskedPseudo for details.
4113 const unsigned Opc = I->UnmaskedPseudo;
4114 const MCInstrDesc &MCID = TII->get(Opc);
4115 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID);
4116
4117 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
4118 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID);
4119
4120 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ||
4121 !RISCVII::hasVecPolicyOp(MCID.TSFlags)) &&
4122 "Unmasked pseudo has policy but masked pseudo doesn't?");
4123 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru &&
4124 "Unexpected pseudo structure");
4125 assert(!(HasPassthru && !MaskedHasPassthru) &&
4126 "Unmasked pseudo has passthru but masked pseudo doesn't?");
4127
4128 SmallVector<SDValue, 8> Ops;
4129 // Skip the passthru operand at index 0 if the unmasked don't have one.
4130 bool ShouldSkip = !HasPassthru && MaskedHasPassthru;
4131 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
4132 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags);
4133 bool HasChainOp =
4134 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other;
4135 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp;
4136 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) {
4137 // Skip the mask
4138 SDValue Op = N->getOperand(I);
4139 if (I == MaskOpIdx)
4140 continue;
4141 if (DropPolicy && I == LastOpNum)
4142 continue;
4143 Ops.push_back(Op);
4144 }
4145
4146 MachineSDNode *Result =
4147 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4148
4149 if (!N->memoperands_empty())
4150 CurDAG->setNodeMemRefs(Result, N->memoperands());
4151
4152 Result->setFlags(N->getFlags());
4153 ReplaceUses(N, Result);
4154
4155 return true;
4156 }
4157
4158 /// If our passthru is an implicit_def, use noreg instead. This side
4159 /// steps issues with MachineCSE not being able to CSE expressions with
4160 /// IMPLICIT_DEF operands while preserving the semantic intent. See
4161 /// pr64282 for context. Note that this transform is the last one
4162 /// performed at ISEL DAG to DAG.
doPeepholeNoRegPassThru()4163 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
4164 bool MadeChange = false;
4165 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
4166
4167 while (Position != CurDAG->allnodes_begin()) {
4168 SDNode *N = &*--Position;
4169 if (N->use_empty() || !N->isMachineOpcode())
4170 continue;
4171
4172 const unsigned Opc = N->getMachineOpcode();
4173 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
4174 !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
4175 !isImplicitDef(N->getOperand(0)))
4176 continue;
4177
4178 SmallVector<SDValue> Ops;
4179 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
4180 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
4181 SDValue Op = N->getOperand(I);
4182 Ops.push_back(Op);
4183 }
4184
4185 MachineSDNode *Result =
4186 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4187 Result->setFlags(N->getFlags());
4188 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
4189 ReplaceUses(N, Result);
4190 MadeChange = true;
4191 }
4192 return MadeChange;
4193 }
4194
4195
4196 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
4197 // for instruction scheduling.
createRISCVISelDag(RISCVTargetMachine & TM,CodeGenOptLevel OptLevel)4198 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4199 CodeGenOptLevel OptLevel) {
4200 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4201 }
4202
4203 char RISCVDAGToDAGISelLegacy::ID = 0;
4204
RISCVDAGToDAGISelLegacy(RISCVTargetMachine & TM,CodeGenOptLevel OptLevel)4205 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4206 CodeGenOptLevel OptLevel)
4207 : SelectionDAGISelLegacy(
4208 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4209
4210 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4211