1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the RISC-V target.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "RISCVISelDAGToDAG.h"
14 #include "MCTargetDesc/RISCVBaseInfo.h"
15 #include "MCTargetDesc/RISCVMCTargetDesc.h"
16 #include "MCTargetDesc/RISCVMatInt.h"
17 #include "RISCVISelLowering.h"
18 #include "RISCVMachineFunctionInfo.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/IR/IntrinsicsRISCV.h"
21 #include "llvm/Support/Alignment.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/raw_ostream.h"
25
26 using namespace llvm;
27
28 #define DEBUG_TYPE "riscv-isel"
29 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
30
31 static cl::opt<bool> UsePseudoMovImm(
32 "riscv-use-rematerializable-movimm", cl::Hidden,
33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
34 "constant materialization"),
35 cl::init(false));
36
37 namespace llvm::RISCV {
38 #define GET_RISCVVSSEGTable_IMPL
39 #define GET_RISCVVLSEGTable_IMPL
40 #define GET_RISCVVLXSEGTable_IMPL
41 #define GET_RISCVVSXSEGTable_IMPL
42 #define GET_RISCVVLETable_IMPL
43 #define GET_RISCVVSETable_IMPL
44 #define GET_RISCVVLXTable_IMPL
45 #define GET_RISCVVSXTable_IMPL
46 #include "RISCVGenSearchableTables.inc"
47 } // namespace llvm::RISCV
48
PreprocessISelDAG()49 void RISCVDAGToDAGISel::PreprocessISelDAG() {
50 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
51
52 bool MadeChange = false;
53 while (Position != CurDAG->allnodes_begin()) {
54 SDNode *N = &*--Position;
55 if (N->use_empty())
56 continue;
57
58 SDValue Result;
59 switch (N->getOpcode()) {
60 case ISD::SPLAT_VECTOR: {
61 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
62 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
63 MVT VT = N->getSimpleValueType(0);
64 unsigned Opc =
65 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
66 SDLoc DL(N);
67 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
68 SDValue Src = N->getOperand(0);
69 if (VT.isInteger())
70 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
71 N->getOperand(0));
72 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
73 break;
74 }
75 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
76 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
77 // load. Done after lowering and combining so that we have a chance to
78 // optimize this to VMV_V_X_VL when the upper bits aren't needed.
79 assert(N->getNumOperands() == 4 && "Unexpected number of operands");
80 MVT VT = N->getSimpleValueType(0);
81 SDValue Passthru = N->getOperand(0);
82 SDValue Lo = N->getOperand(1);
83 SDValue Hi = N->getOperand(2);
84 SDValue VL = N->getOperand(3);
85 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
86 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
87 "Unexpected VTs!");
88 MachineFunction &MF = CurDAG->getMachineFunction();
89 SDLoc DL(N);
90
91 // Create temporary stack for each expanding node.
92 SDValue StackSlot =
93 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
94 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
95 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
96
97 SDValue Chain = CurDAG->getEntryNode();
98 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
99
100 SDValue OffsetSlot =
101 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
102 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
103 Align(8));
104
105 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
106
107 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
108 SDValue IntID =
109 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
110 SDValue Ops[] = {Chain,
111 IntID,
112 Passthru,
113 StackSlot,
114 CurDAG->getRegister(RISCV::X0, MVT::i64),
115 VL};
116
117 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
118 MVT::i64, MPI, Align(8),
119 MachineMemOperand::MOLoad);
120 break;
121 }
122 }
123
124 if (Result) {
125 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
126 LLVM_DEBUG(N->dump(CurDAG));
127 LLVM_DEBUG(dbgs() << "\nNew: ");
128 LLVM_DEBUG(Result->dump(CurDAG));
129 LLVM_DEBUG(dbgs() << "\n");
130
131 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
132 MadeChange = true;
133 }
134 }
135
136 if (MadeChange)
137 CurDAG->RemoveDeadNodes();
138 }
139
PostprocessISelDAG()140 void RISCVDAGToDAGISel::PostprocessISelDAG() {
141 HandleSDNode Dummy(CurDAG->getRoot());
142 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
143
144 bool MadeChange = false;
145 while (Position != CurDAG->allnodes_begin()) {
146 SDNode *N = &*--Position;
147 // Skip dead nodes and any non-machine opcodes.
148 if (N->use_empty() || !N->isMachineOpcode())
149 continue;
150
151 MadeChange |= doPeepholeSExtW(N);
152
153 // FIXME: This is here only because the VMerge transform doesn't
154 // know how to handle masked true inputs. Once that has been moved
155 // to post-ISEL, this can be deleted as well.
156 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
157 }
158
159 CurDAG->setRoot(Dummy.getValue());
160
161 MadeChange |= doPeepholeMergeVVMFold();
162
163 // After we're done with everything else, convert IMPLICIT_DEF
164 // passthru operands to NoRegister. This is required to workaround
165 // an optimization deficiency in MachineCSE. This really should
166 // be merged back into each of the patterns (i.e. there's no good
167 // reason not to go directly to NoReg), but is being done this way
168 // to allow easy backporting.
169 MadeChange |= doPeepholeNoRegPassThru();
170
171 if (MadeChange)
172 CurDAG->RemoveDeadNodes();
173 }
174
selectImmSeq(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,RISCVMatInt::InstSeq & Seq)175 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
176 RISCVMatInt::InstSeq &Seq) {
177 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
178 for (const RISCVMatInt::Inst &Inst : Seq) {
179 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
180 SDNode *Result = nullptr;
181 switch (Inst.getOpndKind()) {
182 case RISCVMatInt::Imm:
183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
184 break;
185 case RISCVMatInt::RegX0:
186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg,
187 CurDAG->getRegister(RISCV::X0, VT));
188 break;
189 case RISCVMatInt::RegReg:
190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg);
191 break;
192 case RISCVMatInt::RegImm:
193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm);
194 break;
195 }
196
197 // Only the first instruction has X0 as its source.
198 SrcReg = SDValue(Result, 0);
199 }
200
201 return SrcReg;
202 }
203
selectImm(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,int64_t Imm,const RISCVSubtarget & Subtarget)204 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
205 int64_t Imm, const RISCVSubtarget &Subtarget) {
206 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
207
208 // Use a rematerializable pseudo instruction for short sequences if enabled.
209 if (Seq.size() == 2 && UsePseudoMovImm)
210 return SDValue(
211 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
212 CurDAG->getTargetConstant(Imm, DL, VT)),
213 0);
214
215 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
216 // worst an LUI+ADDIW. This will require an extra register, but avoids a
217 // constant pool.
218 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
219 // low and high 32 bits are the same and bit 31 and 63 are set.
220 if (Seq.size() > 3) {
221 unsigned ShiftAmt, AddOpc;
222 RISCVMatInt::InstSeq SeqLo =
223 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
224 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
225 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
226
227 SDValue SLLI = SDValue(
228 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
229 CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
230 0);
231 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
232 }
233 }
234
235 // Otherwise, use the original sequence.
236 return selectImmSeq(CurDAG, DL, VT, Seq);
237 }
238
createTuple(SelectionDAG & CurDAG,ArrayRef<SDValue> Regs,unsigned NF,RISCVII::VLMUL LMUL)239 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
240 unsigned NF, RISCVII::VLMUL LMUL) {
241 static const unsigned M1TupleRegClassIDs[] = {
242 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
243 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
244 RISCV::VRN8M1RegClassID};
245 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
246 RISCV::VRN3M2RegClassID,
247 RISCV::VRN4M2RegClassID};
248
249 assert(Regs.size() >= 2 && Regs.size() <= 8);
250
251 unsigned RegClassID;
252 unsigned SubReg0;
253 switch (LMUL) {
254 default:
255 llvm_unreachable("Invalid LMUL.");
256 case RISCVII::VLMUL::LMUL_F8:
257 case RISCVII::VLMUL::LMUL_F4:
258 case RISCVII::VLMUL::LMUL_F2:
259 case RISCVII::VLMUL::LMUL_1:
260 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
261 "Unexpected subreg numbering");
262 SubReg0 = RISCV::sub_vrm1_0;
263 RegClassID = M1TupleRegClassIDs[NF - 2];
264 break;
265 case RISCVII::VLMUL::LMUL_2:
266 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
267 "Unexpected subreg numbering");
268 SubReg0 = RISCV::sub_vrm2_0;
269 RegClassID = M2TupleRegClassIDs[NF - 2];
270 break;
271 case RISCVII::VLMUL::LMUL_4:
272 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
273 "Unexpected subreg numbering");
274 SubReg0 = RISCV::sub_vrm4_0;
275 RegClassID = RISCV::VRN2M4RegClassID;
276 break;
277 }
278
279 SDLoc DL(Regs[0]);
280 SmallVector<SDValue, 8> Ops;
281
282 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
283
284 for (unsigned I = 0; I < Regs.size(); ++I) {
285 Ops.push_back(Regs[I]);
286 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
287 }
288 SDNode *N =
289 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
290 return SDValue(N, 0);
291 }
292
addVectorLoadStoreOperands(SDNode * Node,unsigned Log2SEW,const SDLoc & DL,unsigned CurOp,bool IsMasked,bool IsStridedOrIndexed,SmallVectorImpl<SDValue> & Operands,bool IsLoad,MVT * IndexVT)293 void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
294 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
295 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
296 bool IsLoad, MVT *IndexVT) {
297 SDValue Chain = Node->getOperand(0);
298 SDValue Glue;
299
300 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer.
301
302 if (IsStridedOrIndexed) {
303 Operands.push_back(Node->getOperand(CurOp++)); // Index.
304 if (IndexVT)
305 *IndexVT = Operands.back()->getSimpleValueType(0);
306 }
307
308 if (IsMasked) {
309 // Mask needs to be copied to V0.
310 SDValue Mask = Node->getOperand(CurOp++);
311 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue());
312 Glue = Chain.getValue(1);
313 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType()));
314 }
315 SDValue VL;
316 selectVLOp(Node->getOperand(CurOp++), VL);
317 Operands.push_back(VL);
318
319 MVT XLenVT = Subtarget->getXLenVT();
320 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
321 Operands.push_back(SEWOp);
322
323 // At the IR layer, all the masked load intrinsics have policy operands,
324 // none of the others do. All have passthru operands. For our pseudos,
325 // all loads have policy operands.
326 if (IsLoad) {
327 uint64_t Policy = RISCVII::MASK_AGNOSTIC;
328 if (IsMasked)
329 Policy = Node->getConstantOperandVal(CurOp++);
330 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
331 Operands.push_back(PolicyOp);
332 }
333
334 Operands.push_back(Chain); // Chain.
335 if (Glue)
336 Operands.push_back(Glue);
337 }
338
selectVLSEG(SDNode * Node,bool IsMasked,bool IsStrided)339 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
340 bool IsStrided) {
341 SDLoc DL(Node);
342 unsigned NF = Node->getNumValues() - 1;
343 MVT VT = Node->getSimpleValueType(0);
344 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
345 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
346
347 unsigned CurOp = 2;
348 SmallVector<SDValue, 8> Operands;
349
350 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
351 Node->op_begin() + CurOp + NF);
352 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
353 Operands.push_back(Merge);
354 CurOp += NF;
355
356 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
357 Operands, /*IsLoad=*/true);
358
359 const RISCV::VLSEGPseudo *P =
360 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
361 static_cast<unsigned>(LMUL));
362 MachineSDNode *Load =
363 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
364
365 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
366 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
367
368 SDValue SuperReg = SDValue(Load, 0);
369 for (unsigned I = 0; I < NF; ++I) {
370 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
371 ReplaceUses(SDValue(Node, I),
372 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
373 }
374
375 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
376 CurDAG->RemoveDeadNode(Node);
377 }
378
selectVLSEGFF(SDNode * Node,bool IsMasked)379 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
380 SDLoc DL(Node);
381 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain.
382 MVT VT = Node->getSimpleValueType(0);
383 MVT XLenVT = Subtarget->getXLenVT();
384 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
385 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
386
387 unsigned CurOp = 2;
388 SmallVector<SDValue, 7> Operands;
389
390 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
391 Node->op_begin() + CurOp + NF);
392 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
393 Operands.push_back(MaskedOff);
394 CurOp += NF;
395
396 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
397 /*IsStridedOrIndexed*/ false, Operands,
398 /*IsLoad=*/true);
399
400 const RISCV::VLSEGPseudo *P =
401 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
402 Log2SEW, static_cast<unsigned>(LMUL));
403 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
404 XLenVT, MVT::Other, Operands);
405
406 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
407 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
408
409 SDValue SuperReg = SDValue(Load, 0);
410 for (unsigned I = 0; I < NF; ++I) {
411 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
412 ReplaceUses(SDValue(Node, I),
413 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
414 }
415
416 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
417 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
418 CurDAG->RemoveDeadNode(Node);
419 }
420
selectVLXSEG(SDNode * Node,bool IsMasked,bool IsOrdered)421 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
422 bool IsOrdered) {
423 SDLoc DL(Node);
424 unsigned NF = Node->getNumValues() - 1;
425 MVT VT = Node->getSimpleValueType(0);
426 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
427 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
428
429 unsigned CurOp = 2;
430 SmallVector<SDValue, 8> Operands;
431
432 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
433 Node->op_begin() + CurOp + NF);
434 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
435 Operands.push_back(MaskedOff);
436 CurOp += NF;
437
438 MVT IndexVT;
439 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
440 /*IsStridedOrIndexed*/ true, Operands,
441 /*IsLoad=*/true, &IndexVT);
442
443 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
444 "Element count mismatch");
445
446 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
447 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
448 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
449 report_fatal_error("The V extension does not support EEW=64 for index "
450 "values when XLEN=32");
451 }
452 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
453 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
454 static_cast<unsigned>(IndexLMUL));
455 MachineSDNode *Load =
456 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
457
458 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
459 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
460
461 SDValue SuperReg = SDValue(Load, 0);
462 for (unsigned I = 0; I < NF; ++I) {
463 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I);
464 ReplaceUses(SDValue(Node, I),
465 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
466 }
467
468 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1));
469 CurDAG->RemoveDeadNode(Node);
470 }
471
selectVSSEG(SDNode * Node,bool IsMasked,bool IsStrided)472 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked,
473 bool IsStrided) {
474 SDLoc DL(Node);
475 unsigned NF = Node->getNumOperands() - 4;
476 if (IsStrided)
477 NF--;
478 if (IsMasked)
479 NF--;
480 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
481 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
482 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
483 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
484 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
485
486 SmallVector<SDValue, 8> Operands;
487 Operands.push_back(StoreVal);
488 unsigned CurOp = 2 + NF;
489
490 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
491 Operands);
492
493 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo(
494 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
495 MachineSDNode *Store =
496 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
497
498 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
499 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
500
501 ReplaceNode(Node, Store);
502 }
503
selectVSXSEG(SDNode * Node,bool IsMasked,bool IsOrdered)504 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
505 bool IsOrdered) {
506 SDLoc DL(Node);
507 unsigned NF = Node->getNumOperands() - 5;
508 if (IsMasked)
509 --NF;
510 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
511 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
512 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
513 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF);
514 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL);
515
516 SmallVector<SDValue, 8> Operands;
517 Operands.push_back(StoreVal);
518 unsigned CurOp = 2 + NF;
519
520 MVT IndexVT;
521 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
522 /*IsStridedOrIndexed*/ true, Operands,
523 /*IsLoad=*/false, &IndexVT);
524
525 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
526 "Element count mismatch");
527
528 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
529 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
530 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
531 report_fatal_error("The V extension does not support EEW=64 for index "
532 "values when XLEN=32");
533 }
534 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo(
535 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
536 static_cast<unsigned>(IndexLMUL));
537 MachineSDNode *Store =
538 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands);
539
540 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
541 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
542
543 ReplaceNode(Node, Store);
544 }
545
selectVSETVLI(SDNode * Node)546 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
547 if (!Subtarget->hasVInstructions())
548 return;
549
550 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
551
552 SDLoc DL(Node);
553 MVT XLenVT = Subtarget->getXLenVT();
554
555 unsigned IntNo = Node->getConstantOperandVal(0);
556
557 assert((IntNo == Intrinsic::riscv_vsetvli ||
558 IntNo == Intrinsic::riscv_vsetvlimax) &&
559 "Unexpected vsetvli intrinsic");
560
561 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
562 unsigned Offset = (VLMax ? 1 : 2);
563
564 assert(Node->getNumOperands() == Offset + 2 &&
565 "Unexpected number of operands");
566
567 unsigned SEW =
568 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
569 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
570 Node->getConstantOperandVal(Offset + 1) & 0x7);
571
572 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
573 /*MaskAgnostic*/ true);
574 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
575
576 SDValue VLOperand;
577 unsigned Opcode = RISCV::PseudoVSETVLI;
578 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
579 if (auto VLEN = Subtarget->getRealVLen())
580 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
581 VLMax = true;
582 }
583 if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
584 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
585 Opcode = RISCV::PseudoVSETVLIX0;
586 } else {
587 VLOperand = Node->getOperand(1);
588
589 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
590 uint64_t AVL = C->getZExtValue();
591 if (isUInt<5>(AVL)) {
592 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
593 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
594 XLenVT, VLImm, VTypeIOp));
595 return;
596 }
597 }
598 }
599
600 ReplaceNode(Node,
601 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
602 }
603
tryShrinkShlLogicImm(SDNode * Node)604 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
605 MVT VT = Node->getSimpleValueType(0);
606 unsigned Opcode = Node->getOpcode();
607 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) &&
608 "Unexpected opcode");
609 SDLoc DL(Node);
610
611 // For operations of the form (x << C1) op C2, check if we can use
612 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1.
613 SDValue N0 = Node->getOperand(0);
614 SDValue N1 = Node->getOperand(1);
615
616 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
617 if (!Cst)
618 return false;
619
620 int64_t Val = Cst->getSExtValue();
621
622 // Check if immediate can already use ANDI/ORI/XORI.
623 if (isInt<12>(Val))
624 return false;
625
626 SDValue Shift = N0;
627
628 // If Val is simm32 and we have a sext_inreg from i32, then the binop
629 // produces at least 33 sign bits. We can peek through the sext_inreg and use
630 // a SLLIW at the end.
631 bool SignExt = false;
632 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
633 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) {
634 SignExt = true;
635 Shift = N0.getOperand(0);
636 }
637
638 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse())
639 return false;
640
641 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
642 if (!ShlCst)
643 return false;
644
645 uint64_t ShAmt = ShlCst->getZExtValue();
646
647 // Make sure that we don't change the operation by removing bits.
648 // This only matters for OR and XOR, AND is unaffected.
649 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt);
650 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
651 return false;
652
653 int64_t ShiftedVal = Val >> ShAmt;
654 if (!isInt<12>(ShiftedVal))
655 return false;
656
657 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW.
658 if (SignExt && ShAmt >= 32)
659 return false;
660
661 // Ok, we can reorder to get a smaller immediate.
662 unsigned BinOpc;
663 switch (Opcode) {
664 default: llvm_unreachable("Unexpected opcode");
665 case ISD::AND: BinOpc = RISCV::ANDI; break;
666 case ISD::OR: BinOpc = RISCV::ORI; break;
667 case ISD::XOR: BinOpc = RISCV::XORI; break;
668 }
669
670 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI;
671
672 SDNode *BinOp =
673 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0),
674 CurDAG->getTargetConstant(ShiftedVal, DL, VT));
675 SDNode *SLLI =
676 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0),
677 CurDAG->getTargetConstant(ShAmt, DL, VT));
678 ReplaceNode(Node, SLLI);
679 return true;
680 }
681
trySignedBitfieldExtract(SDNode * Node)682 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
683 // Only supported with XTHeadBb at the moment.
684 if (!Subtarget->hasVendorXTHeadBb())
685 return false;
686
687 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
688 if (!N1C)
689 return false;
690
691 SDValue N0 = Node->getOperand(0);
692 if (!N0.hasOneUse())
693 return false;
694
695 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
696 MVT VT) {
697 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
698 CurDAG->getTargetConstant(Msb, DL, VT),
699 CurDAG->getTargetConstant(Lsb, DL, VT));
700 };
701
702 SDLoc DL(Node);
703 MVT VT = Node->getSimpleValueType(0);
704 const unsigned RightShAmt = N1C->getZExtValue();
705
706 // Transform (sra (shl X, C1) C2) with C1 < C2
707 // -> (TH.EXT X, msb, lsb)
708 if (N0.getOpcode() == ISD::SHL) {
709 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
710 if (!N01C)
711 return false;
712
713 const unsigned LeftShAmt = N01C->getZExtValue();
714 // Make sure that this is a bitfield extraction (i.e., the shift-right
715 // amount can not be less than the left-shift).
716 if (LeftShAmt > RightShAmt)
717 return false;
718
719 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
720 const unsigned Msb = MsbPlusOne - 1;
721 const unsigned Lsb = RightShAmt - LeftShAmt;
722
723 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
724 ReplaceNode(Node, TH_EXT);
725 return true;
726 }
727
728 // Transform (sra (sext_inreg X, _), C) ->
729 // (TH.EXT X, msb, lsb)
730 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
731 unsigned ExtSize =
732 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
733
734 // ExtSize of 32 should use sraiw via tablegen pattern.
735 if (ExtSize == 32)
736 return false;
737
738 const unsigned Msb = ExtSize - 1;
739 const unsigned Lsb = RightShAmt;
740
741 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
742 ReplaceNode(Node, TH_EXT);
743 return true;
744 }
745
746 return false;
747 }
748
tryIndexedLoad(SDNode * Node)749 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
750 // Target does not support indexed loads.
751 if (!Subtarget->hasVendorXTHeadMemIdx())
752 return false;
753
754 LoadSDNode *Ld = cast<LoadSDNode>(Node);
755 ISD::MemIndexedMode AM = Ld->getAddressingMode();
756 if (AM == ISD::UNINDEXED)
757 return false;
758
759 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
760 if (!C)
761 return false;
762
763 EVT LoadVT = Ld->getMemoryVT();
764 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) &&
765 "Unexpected addressing mode");
766 bool IsPre = AM == ISD::PRE_INC;
767 bool IsPost = AM == ISD::POST_INC;
768 int64_t Offset = C->getSExtValue();
769
770 // The constants that can be encoded in the THeadMemIdx instructions
771 // are of the form (sign_extend(imm5) << imm2).
772 int64_t Shift;
773 for (Shift = 0; Shift < 4; Shift++)
774 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
775 break;
776
777 // Constant cannot be encoded.
778 if (Shift == 4)
779 return false;
780
781 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
782 unsigned Opcode;
783 if (LoadVT == MVT::i8 && IsPre)
784 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
785 else if (LoadVT == MVT::i8 && IsPost)
786 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
787 else if (LoadVT == MVT::i16 && IsPre)
788 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
789 else if (LoadVT == MVT::i16 && IsPost)
790 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
791 else if (LoadVT == MVT::i32 && IsPre)
792 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
793 else if (LoadVT == MVT::i32 && IsPost)
794 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
795 else if (LoadVT == MVT::i64 && IsPre)
796 Opcode = RISCV::TH_LDIB;
797 else if (LoadVT == MVT::i64 && IsPost)
798 Opcode = RISCV::TH_LDIA;
799 else
800 return false;
801
802 EVT Ty = Ld->getOffset().getValueType();
803 SDValue Ops[] = {Ld->getBasePtr(),
804 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
805 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
806 Ld->getChain()};
807 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
808 Ld->getValueType(1), MVT::Other, Ops);
809
810 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
811 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
812
813 ReplaceNode(Node, New);
814
815 return true;
816 }
817
selectSF_VC_X_SE(SDNode * Node)818 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) {
819 if (!Subtarget->hasVInstructions())
820 return;
821
822 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode");
823
824 SDLoc DL(Node);
825 unsigned IntNo = Node->getConstantOperandVal(1);
826
827 assert((IntNo == Intrinsic::riscv_sf_vc_x_se ||
828 IntNo == Intrinsic::riscv_sf_vc_i_se) &&
829 "Unexpected vsetvli intrinsic");
830
831 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl
832 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6));
833 SDValue SEWOp =
834 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT());
835 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3),
836 Node->getOperand(4), Node->getOperand(5),
837 Node->getOperand(8), SEWOp,
838 Node->getOperand(0)};
839
840 unsigned Opcode;
841 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7));
842 switch (LMulSDNode->getSExtValue()) {
843 case 5:
844 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF8
845 : RISCV::PseudoVC_I_SE_MF8;
846 break;
847 case 6:
848 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF4
849 : RISCV::PseudoVC_I_SE_MF4;
850 break;
851 case 7:
852 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_MF2
853 : RISCV::PseudoVC_I_SE_MF2;
854 break;
855 case 0:
856 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M1
857 : RISCV::PseudoVC_I_SE_M1;
858 break;
859 case 1:
860 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M2
861 : RISCV::PseudoVC_I_SE_M2;
862 break;
863 case 2:
864 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M4
865 : RISCV::PseudoVC_I_SE_M4;
866 break;
867 case 3:
868 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoVC_X_SE_M8
869 : RISCV::PseudoVC_I_SE_M8;
870 break;
871 }
872
873 ReplaceNode(Node, CurDAG->getMachineNode(
874 Opcode, DL, Node->getSimpleValueType(0), Operands));
875 }
876
Select(SDNode * Node)877 void RISCVDAGToDAGISel::Select(SDNode *Node) {
878 // If we have a custom node, we have already selected.
879 if (Node->isMachineOpcode()) {
880 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
881 Node->setNodeId(-1);
882 return;
883 }
884
885 // Instruction Selection not handled by the auto-generated tablegen selection
886 // should be handled here.
887 unsigned Opcode = Node->getOpcode();
888 MVT XLenVT = Subtarget->getXLenVT();
889 SDLoc DL(Node);
890 MVT VT = Node->getSimpleValueType(0);
891
892 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
893
894 switch (Opcode) {
895 case ISD::Constant: {
896 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
897 auto *ConstNode = cast<ConstantSDNode>(Node);
898 if (ConstNode->isZero()) {
899 SDValue New =
900 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
901 ReplaceNode(Node, New.getNode());
902 return;
903 }
904 int64_t Imm = ConstNode->getSExtValue();
905 // If only the lower 8 bits are used, try to convert this to a simm6 by
906 // sign-extending bit 7. This is neutral without the C extension, and
907 // allows C.LI to be used if C is present.
908 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node))
909 Imm = SignExtend64<8>(Imm);
910 // If the upper XLen-16 bits are not used, try to convert this to a simm12
911 // by sign extending bit 15.
912 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
913 hasAllHUsers(Node))
914 Imm = SignExtend64<16>(Imm);
915 // If the upper 32-bits are not used try to convert this into a simm32 by
916 // sign extending bit 32.
917 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
918 Imm = SignExtend64<32>(Imm);
919
920 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
921 return;
922 }
923 case ISD::ConstantFP: {
924 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
925 auto [FPImm, NeedsFNeg] =
926 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
927 VT);
928 if (FPImm >= 0) {
929 unsigned Opc;
930 unsigned FNegOpc;
931 switch (VT.SimpleTy) {
932 default:
933 llvm_unreachable("Unexpected size");
934 case MVT::f16:
935 Opc = RISCV::FLI_H;
936 FNegOpc = RISCV::FSGNJN_H;
937 break;
938 case MVT::f32:
939 Opc = RISCV::FLI_S;
940 FNegOpc = RISCV::FSGNJN_S;
941 break;
942 case MVT::f64:
943 Opc = RISCV::FLI_D;
944 FNegOpc = RISCV::FSGNJN_D;
945 break;
946 }
947 SDNode *Res = CurDAG->getMachineNode(
948 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
949 if (NeedsFNeg)
950 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
951 SDValue(Res, 0));
952
953 ReplaceNode(Node, Res);
954 return;
955 }
956
957 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
958 SDValue Imm;
959 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
960 // create an integer immediate.
961 if (APF.isPosZero() || NegZeroF64)
962 Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
963 else
964 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
965 *Subtarget);
966
967 bool HasZdinx = Subtarget->hasStdExtZdinx();
968 bool Is64Bit = Subtarget->is64Bit();
969 unsigned Opc;
970 switch (VT.SimpleTy) {
971 default:
972 llvm_unreachable("Unexpected size");
973 case MVT::bf16:
974 assert(Subtarget->hasStdExtZfbfmin());
975 Opc = RISCV::FMV_H_X;
976 break;
977 case MVT::f16:
978 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
979 break;
980 case MVT::f32:
981 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
982 break;
983 case MVT::f64:
984 // For RV32, we can't move from a GPR, we need to convert instead. This
985 // should only happen for +0.0 and -0.0.
986 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
987 if (Is64Bit)
988 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
989 else
990 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
991 break;
992 }
993
994 SDNode *Res;
995 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
996 Res = CurDAG->getMachineNode(
997 Opc, DL, VT, Imm,
998 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
999 else
1000 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
1001
1002 // For f64 -0.0, we need to insert a fneg.d idiom.
1003 if (NegZeroF64) {
1004 Opc = RISCV::FSGNJN_D;
1005 if (HasZdinx)
1006 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
1007 Res =
1008 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
1009 }
1010
1011 ReplaceNode(Node, Res);
1012 return;
1013 }
1014 case RISCVISD::BuildPairF64: {
1015 if (!Subtarget->hasStdExtZdinx())
1016 break;
1017
1018 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1019
1020 SDValue Ops[] = {
1021 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32),
1022 Node->getOperand(0),
1023 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32),
1024 Node->getOperand(1),
1025 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)};
1026
1027 SDNode *N =
1028 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::f64, Ops);
1029 ReplaceNode(Node, N);
1030 return;
1031 }
1032 case RISCVISD::SplitF64: {
1033 if (Subtarget->hasStdExtZdinx()) {
1034 assert(!Subtarget->is64Bit() && "Unexpected subtarget");
1035
1036 if (!SDValue(Node, 0).use_empty()) {
1037 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, VT,
1038 Node->getOperand(0));
1039 ReplaceUses(SDValue(Node, 0), Lo);
1040 }
1041
1042 if (!SDValue(Node, 1).use_empty()) {
1043 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, VT,
1044 Node->getOperand(0));
1045 ReplaceUses(SDValue(Node, 1), Hi);
1046 }
1047
1048 CurDAG->RemoveDeadNode(Node);
1049 return;
1050 }
1051
1052 if (!Subtarget->hasStdExtZfa())
1053 break;
1054 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
1055 "Unexpected subtarget");
1056
1057 // With Zfa, lower to fmv.x.w and fmvh.x.d.
1058 if (!SDValue(Node, 0).use_empty()) {
1059 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
1060 Node->getOperand(0));
1061 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
1062 }
1063 if (!SDValue(Node, 1).use_empty()) {
1064 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
1065 Node->getOperand(0));
1066 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
1067 }
1068
1069 CurDAG->RemoveDeadNode(Node);
1070 return;
1071 }
1072 case ISD::SHL: {
1073 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1074 if (!N1C)
1075 break;
1076 SDValue N0 = Node->getOperand(0);
1077 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
1078 !isa<ConstantSDNode>(N0.getOperand(1)))
1079 break;
1080 unsigned ShAmt = N1C->getZExtValue();
1081 uint64_t Mask = N0.getConstantOperandVal(1);
1082
1083 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1084 // 32 leading zeros and C3 trailing zeros.
1085 if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
1086 unsigned XLen = Subtarget->getXLen();
1087 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1088 unsigned TrailingZeros = llvm::countr_zero(Mask);
1089 if (TrailingZeros > 0 && LeadingZeros == 32) {
1090 SDNode *SRLIW = CurDAG->getMachineNode(
1091 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1092 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1093 SDNode *SLLI = CurDAG->getMachineNode(
1094 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1095 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
1096 ReplaceNode(Node, SLLI);
1097 return;
1098 }
1099 }
1100 break;
1101 }
1102 case ISD::SRL: {
1103 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1104 if (!N1C)
1105 break;
1106 SDValue N0 = Node->getOperand(0);
1107 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1108 break;
1109 unsigned ShAmt = N1C->getZExtValue();
1110 uint64_t Mask = N0.getConstantOperandVal(1);
1111
1112 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
1113 // 32 leading zeros and C3 trailing zeros.
1114 if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
1115 unsigned XLen = Subtarget->getXLen();
1116 unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
1117 unsigned TrailingZeros = llvm::countr_zero(Mask);
1118 if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
1119 SDNode *SRLIW = CurDAG->getMachineNode(
1120 RISCV::SRLIW, DL, VT, N0->getOperand(0),
1121 CurDAG->getTargetConstant(TrailingZeros, DL, VT));
1122 SDNode *SLLI = CurDAG->getMachineNode(
1123 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1124 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
1125 ReplaceNode(Node, SLLI);
1126 return;
1127 }
1128 }
1129
1130 // Optimize (srl (and X, C2), C) ->
1131 // (srli (slli X, (XLen-C3), (XLen-C3) + C)
1132 // Where C2 is a mask with C3 trailing ones.
1133 // Taking into account that the C2 may have had lower bits unset by
1134 // SimplifyDemandedBits. This avoids materializing the C2 immediate.
1135 // This pattern occurs when type legalizing right shifts for types with
1136 // less than XLen bits.
1137 Mask |= maskTrailingOnes<uint64_t>(ShAmt);
1138 if (!isMask_64(Mask))
1139 break;
1140 unsigned TrailingOnes = llvm::countr_one(Mask);
1141 if (ShAmt >= TrailingOnes)
1142 break;
1143 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
1144 if (TrailingOnes == 32) {
1145 SDNode *SRLI = CurDAG->getMachineNode(
1146 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
1147 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1148 ReplaceNode(Node, SRLI);
1149 return;
1150 }
1151
1152 // Only do the remaining transforms if the AND has one use.
1153 if (!N0.hasOneUse())
1154 break;
1155
1156 // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
1157 if (HasBitTest && ShAmt + 1 == TrailingOnes) {
1158 SDNode *BEXTI = CurDAG->getMachineNode(
1159 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
1160 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
1161 ReplaceNode(Node, BEXTI);
1162 return;
1163 }
1164
1165 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
1166 SDNode *SLLI =
1167 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1168 CurDAG->getTargetConstant(LShAmt, DL, VT));
1169 SDNode *SRLI = CurDAG->getMachineNode(
1170 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1171 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1172 ReplaceNode(Node, SRLI);
1173 return;
1174 }
1175 case ISD::SRA: {
1176 if (trySignedBitfieldExtract(Node))
1177 return;
1178
1179 // Optimize (sra (sext_inreg X, i16), C) ->
1180 // (srai (slli X, (XLen-16), (XLen-16) + C)
1181 // And (sra (sext_inreg X, i8), C) ->
1182 // (srai (slli X, (XLen-8), (XLen-8) + C)
1183 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
1184 // This transform matches the code we get without Zbb. The shifts are more
1185 // compressible, and this can help expose CSE opportunities in the sdiv by
1186 // constant optimization.
1187 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1188 if (!N1C)
1189 break;
1190 SDValue N0 = Node->getOperand(0);
1191 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
1192 break;
1193 unsigned ShAmt = N1C->getZExtValue();
1194 unsigned ExtSize =
1195 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
1196 // ExtSize of 32 should use sraiw via tablegen pattern.
1197 if (ExtSize >= 32 || ShAmt >= ExtSize)
1198 break;
1199 unsigned LShAmt = Subtarget->getXLen() - ExtSize;
1200 SDNode *SLLI =
1201 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
1202 CurDAG->getTargetConstant(LShAmt, DL, VT));
1203 SDNode *SRAI = CurDAG->getMachineNode(
1204 RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
1205 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
1206 ReplaceNode(Node, SRAI);
1207 return;
1208 }
1209 case ISD::OR:
1210 case ISD::XOR:
1211 if (tryShrinkShlLogicImm(Node))
1212 return;
1213
1214 break;
1215 case ISD::AND: {
1216 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1217 if (!N1C)
1218 break;
1219 uint64_t C1 = N1C->getZExtValue();
1220 const bool isC1Mask = isMask_64(C1);
1221 const bool isC1ANDI = isInt<12>(C1);
1222
1223 SDValue N0 = Node->getOperand(0);
1224
1225 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
1226 SDValue X, unsigned Msb,
1227 unsigned Lsb) {
1228 if (!Subtarget->hasVendorXTHeadBb())
1229 return false;
1230
1231 SDNode *TH_EXTU = CurDAG->getMachineNode(
1232 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
1233 CurDAG->getTargetConstant(Lsb, DL, VT));
1234 ReplaceNode(Node, TH_EXTU);
1235 return true;
1236 };
1237
1238 bool LeftShift = N0.getOpcode() == ISD::SHL;
1239 if (LeftShift || N0.getOpcode() == ISD::SRL) {
1240 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1241 if (!C)
1242 break;
1243 unsigned C2 = C->getZExtValue();
1244 unsigned XLen = Subtarget->getXLen();
1245 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
1246
1247 // Keep track of whether this is a c.andi. If we can't use c.andi, the
1248 // shift pair might offer more compression opportunities.
1249 // TODO: We could check for C extension here, but we don't have many lit
1250 // tests with the C extension enabled so not checking gets better
1251 // coverage.
1252 // TODO: What if ANDI faster than shift?
1253 bool IsCANDI = isInt<6>(N1C->getSExtValue());
1254
1255 // Clear irrelevant bits in the mask.
1256 if (LeftShift)
1257 C1 &= maskTrailingZeros<uint64_t>(C2);
1258 else
1259 C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
1260
1261 // Some transforms should only be done if the shift has a single use or
1262 // the AND would become (srli (slli X, 32), 32)
1263 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
1264
1265 SDValue X = N0.getOperand(0);
1266
1267 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
1268 // with c3 leading zeros.
1269 if (!LeftShift && isC1Mask) {
1270 unsigned Leading = XLen - llvm::bit_width(C1);
1271 if (C2 < Leading) {
1272 // If the number of leading zeros is C2+32 this can be SRLIW.
1273 if (C2 + 32 == Leading) {
1274 SDNode *SRLIW = CurDAG->getMachineNode(
1275 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
1276 ReplaceNode(Node, SRLIW);
1277 return;
1278 }
1279
1280 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32)
1281 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
1282 //
1283 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
1284 // legalized and goes through DAG combine.
1285 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() &&
1286 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1287 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
1288 SDNode *SRAIW =
1289 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
1290 CurDAG->getTargetConstant(31, DL, VT));
1291 SDNode *SRLIW = CurDAG->getMachineNode(
1292 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
1293 CurDAG->getTargetConstant(Leading - 32, DL, VT));
1294 ReplaceNode(Node, SRLIW);
1295 return;
1296 }
1297
1298 // Try to use an unsigned bitfield extract (e.g., th.extu) if
1299 // available.
1300 // Transform (and (srl x, C2), C1)
1301 // -> (<bfextract> x, msb, lsb)
1302 //
1303 // Make sure to keep this below the SRLIW cases, as we always want to
1304 // prefer the more common instruction.
1305 const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
1306 const unsigned Lsb = C2;
1307 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
1308 return;
1309
1310 // (srli (slli x, c3-c2), c3).
1311 // Skip if we could use (zext.w (sraiw X, C2)).
1312 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
1313 X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
1314 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
1315 // Also Skip if we can use bexti or th.tst.
1316 Skip |= HasBitTest && Leading == XLen - 1;
1317 if (OneUseOrZExtW && !Skip) {
1318 SDNode *SLLI = CurDAG->getMachineNode(
1319 RISCV::SLLI, DL, VT, X,
1320 CurDAG->getTargetConstant(Leading - C2, DL, VT));
1321 SDNode *SRLI = CurDAG->getMachineNode(
1322 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1323 CurDAG->getTargetConstant(Leading, DL, VT));
1324 ReplaceNode(Node, SRLI);
1325 return;
1326 }
1327 }
1328 }
1329
1330 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
1331 // shifted by c2 bits with c3 leading zeros.
1332 if (LeftShift && isShiftedMask_64(C1)) {
1333 unsigned Leading = XLen - llvm::bit_width(C1);
1334
1335 if (C2 + Leading < XLen &&
1336 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) {
1337 // Use slli.uw when possible.
1338 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) {
1339 SDNode *SLLI_UW =
1340 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X,
1341 CurDAG->getTargetConstant(C2, DL, VT));
1342 ReplaceNode(Node, SLLI_UW);
1343 return;
1344 }
1345
1346 // (srli (slli c2+c3), c3)
1347 if (OneUseOrZExtW && !IsCANDI) {
1348 SDNode *SLLI = CurDAG->getMachineNode(
1349 RISCV::SLLI, DL, VT, X,
1350 CurDAG->getTargetConstant(C2 + Leading, DL, VT));
1351 SDNode *SRLI = CurDAG->getMachineNode(
1352 RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1353 CurDAG->getTargetConstant(Leading, DL, VT));
1354 ReplaceNode(Node, SRLI);
1355 return;
1356 }
1357 }
1358 }
1359
1360 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
1361 // shifted mask with c2 leading zeros and c3 trailing zeros.
1362 if (!LeftShift && isShiftedMask_64(C1)) {
1363 unsigned Leading = XLen - llvm::bit_width(C1);
1364 unsigned Trailing = llvm::countr_zero(C1);
1365 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
1366 !IsCANDI) {
1367 unsigned SrliOpc = RISCV::SRLI;
1368 // If the input is zexti32 we should use SRLIW.
1369 if (X.getOpcode() == ISD::AND &&
1370 isa<ConstantSDNode>(X.getOperand(1)) &&
1371 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
1372 SrliOpc = RISCV::SRLIW;
1373 X = X.getOperand(0);
1374 }
1375 SDNode *SRLI = CurDAG->getMachineNode(
1376 SrliOpc, DL, VT, X,
1377 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1378 SDNode *SLLI = CurDAG->getMachineNode(
1379 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1380 CurDAG->getTargetConstant(Trailing, DL, VT));
1381 ReplaceNode(Node, SLLI);
1382 return;
1383 }
1384 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
1385 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 &&
1386 OneUseOrZExtW && !IsCANDI) {
1387 SDNode *SRLIW = CurDAG->getMachineNode(
1388 RISCV::SRLIW, DL, VT, X,
1389 CurDAG->getTargetConstant(C2 + Trailing, DL, VT));
1390 SDNode *SLLI = CurDAG->getMachineNode(
1391 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1392 CurDAG->getTargetConstant(Trailing, DL, VT));
1393 ReplaceNode(Node, SLLI);
1394 return;
1395 }
1396 }
1397
1398 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
1399 // shifted mask with no leading zeros and c3 trailing zeros.
1400 if (LeftShift && isShiftedMask_64(C1)) {
1401 unsigned Leading = XLen - llvm::bit_width(C1);
1402 unsigned Trailing = llvm::countr_zero(C1);
1403 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
1404 SDNode *SRLI = CurDAG->getMachineNode(
1405 RISCV::SRLI, DL, VT, X,
1406 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1407 SDNode *SLLI = CurDAG->getMachineNode(
1408 RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
1409 CurDAG->getTargetConstant(Trailing, DL, VT));
1410 ReplaceNode(Node, SLLI);
1411 return;
1412 }
1413 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
1414 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
1415 SDNode *SRLIW = CurDAG->getMachineNode(
1416 RISCV::SRLIW, DL, VT, X,
1417 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1418 SDNode *SLLI = CurDAG->getMachineNode(
1419 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
1420 CurDAG->getTargetConstant(Trailing, DL, VT));
1421 ReplaceNode(Node, SLLI);
1422 return;
1423 }
1424
1425 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI.
1426 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW &&
1427 Subtarget->hasStdExtZba()) {
1428 SDNode *SRLI = CurDAG->getMachineNode(
1429 RISCV::SRLI, DL, VT, X,
1430 CurDAG->getTargetConstant(Trailing - C2, DL, VT));
1431 SDNode *SLLI_UW = CurDAG->getMachineNode(
1432 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0),
1433 CurDAG->getTargetConstant(Trailing, DL, VT));
1434 ReplaceNode(Node, SLLI_UW);
1435 return;
1436 }
1437 }
1438 }
1439
1440 // If C1 masks off the upper bits only (but can't be formed as an
1441 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
1442 // available.
1443 // Transform (and x, C1)
1444 // -> (<bfextract> x, msb, lsb)
1445 if (isC1Mask && !isC1ANDI) {
1446 const unsigned Msb = llvm::bit_width(C1) - 1;
1447 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
1448 return;
1449 }
1450
1451 if (tryShrinkShlLogicImm(Node))
1452 return;
1453
1454 break;
1455 }
1456 case ISD::MUL: {
1457 // Special case for calculating (mul (and X, C2), C1) where the full product
1458 // fits in XLen bits. We can shift X left by the number of leading zeros in
1459 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final
1460 // product has XLen trailing zeros, putting it in the output of MULHU. This
1461 // can avoid materializing a constant in a register for C2.
1462
1463 // RHS should be a constant.
1464 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
1465 if (!N1C || !N1C->hasOneUse())
1466 break;
1467
1468 // LHS should be an AND with constant.
1469 SDValue N0 = Node->getOperand(0);
1470 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1)))
1471 break;
1472
1473 uint64_t C2 = N0.getConstantOperandVal(1);
1474
1475 // Constant should be a mask.
1476 if (!isMask_64(C2))
1477 break;
1478
1479 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
1480 // multiple users or the constant is a simm12. This prevents inserting a
1481 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
1482 // make it more costly to materialize. Otherwise, using a SLLI might allow
1483 // it to be compressed.
1484 bool IsANDIOrZExt =
1485 isInt<12>(C2) ||
1486 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
1487 // With XTHeadBb, we can use TH.EXTU.
1488 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
1489 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
1490 break;
1491 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
1492 // the constant is a simm32.
1493 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
1494 // With XTHeadBb, we can use TH.EXTU.
1495 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
1496 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
1497 break;
1498
1499 // We need to shift left the AND input and C1 by a total of XLen bits.
1500
1501 // How far left do we need to shift the AND input?
1502 unsigned XLen = Subtarget->getXLen();
1503 unsigned LeadingZeros = XLen - llvm::bit_width(C2);
1504
1505 // The constant gets shifted by the remaining amount unless that would
1506 // shift bits out.
1507 uint64_t C1 = N1C->getZExtValue();
1508 unsigned ConstantShift = XLen - LeadingZeros;
1509 if (ConstantShift > (XLen - llvm::bit_width(C1)))
1510 break;
1511
1512 uint64_t ShiftedC1 = C1 << ConstantShift;
1513 // If this RV32, we need to sign extend the constant.
1514 if (XLen == 32)
1515 ShiftedC1 = SignExtend64<32>(ShiftedC1);
1516
1517 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
1518 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
1519 SDNode *SLLI =
1520 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
1521 CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1522 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT,
1523 SDValue(SLLI, 0), SDValue(Imm, 0));
1524 ReplaceNode(Node, MULHU);
1525 return;
1526 }
1527 case ISD::LOAD: {
1528 if (tryIndexedLoad(Node))
1529 return;
1530
1531 if (Subtarget->hasVendorXCVmem()) {
1532 // We match post-incrementing load here
1533 LoadSDNode *Load = cast<LoadSDNode>(Node);
1534 if (Load->getAddressingMode() != ISD::POST_INC)
1535 break;
1536
1537 SDValue Chain = Node->getOperand(0);
1538 SDValue Base = Node->getOperand(1);
1539 SDValue Offset = Node->getOperand(2);
1540
1541 bool Simm12 = false;
1542 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD;
1543
1544 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) {
1545 int ConstantVal = ConstantOffset->getSExtValue();
1546 Simm12 = isInt<12>(ConstantVal);
1547 if (Simm12)
1548 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset),
1549 Offset.getValueType());
1550 }
1551
1552 unsigned Opcode = 0;
1553 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) {
1554 case MVT::i8:
1555 if (Simm12 && SignExtend)
1556 Opcode = RISCV::CV_LB_ri_inc;
1557 else if (Simm12 && !SignExtend)
1558 Opcode = RISCV::CV_LBU_ri_inc;
1559 else if (!Simm12 && SignExtend)
1560 Opcode = RISCV::CV_LB_rr_inc;
1561 else
1562 Opcode = RISCV::CV_LBU_rr_inc;
1563 break;
1564 case MVT::i16:
1565 if (Simm12 && SignExtend)
1566 Opcode = RISCV::CV_LH_ri_inc;
1567 else if (Simm12 && !SignExtend)
1568 Opcode = RISCV::CV_LHU_ri_inc;
1569 else if (!Simm12 && SignExtend)
1570 Opcode = RISCV::CV_LH_rr_inc;
1571 else
1572 Opcode = RISCV::CV_LHU_rr_inc;
1573 break;
1574 case MVT::i32:
1575 if (Simm12)
1576 Opcode = RISCV::CV_LW_ri_inc;
1577 else
1578 Opcode = RISCV::CV_LW_rr_inc;
1579 break;
1580 default:
1581 break;
1582 }
1583 if (!Opcode)
1584 break;
1585
1586 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT,
1587 Chain.getSimpleValueType(), Base,
1588 Offset, Chain));
1589 return;
1590 }
1591 break;
1592 }
1593 case ISD::INTRINSIC_WO_CHAIN: {
1594 unsigned IntNo = Node->getConstantOperandVal(0);
1595 switch (IntNo) {
1596 // By default we do not custom select any intrinsic.
1597 default:
1598 break;
1599 case Intrinsic::riscv_vmsgeu:
1600 case Intrinsic::riscv_vmsge: {
1601 SDValue Src1 = Node->getOperand(1);
1602 SDValue Src2 = Node->getOperand(2);
1603 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu;
1604 bool IsCmpUnsignedZero = false;
1605 // Only custom select scalar second operand.
1606 if (Src2.getValueType() != XLenVT)
1607 break;
1608 // Small constants are handled with patterns.
1609 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1610 int64_t CVal = C->getSExtValue();
1611 if (CVal >= -15 && CVal <= 16) {
1612 if (!IsUnsigned || CVal != 0)
1613 break;
1614 IsCmpUnsignedZero = true;
1615 }
1616 }
1617 MVT Src1VT = Src1.getSimpleValueType();
1618 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode;
1619 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1620 default:
1621 llvm_unreachable("Unexpected LMUL!");
1622 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
1623 case RISCVII::VLMUL::lmulenum: \
1624 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1625 : RISCV::PseudoVMSLT_VX_##suffix; \
1626 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \
1627 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
1628 break;
1629 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1)
1630 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2)
1631 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4)
1632 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8)
1633 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16)
1634 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32)
1635 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64)
1636 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES
1637 }
1638 SDValue SEW = CurDAG->getTargetConstant(
1639 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1640 SDValue VL;
1641 selectVLOp(Node->getOperand(3), VL);
1642
1643 // If vmsgeu with 0 immediate, expand it to vmset.
1644 if (IsCmpUnsignedZero) {
1645 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW));
1646 return;
1647 }
1648
1649 // Expand to
1650 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
1651 SDValue Cmp = SDValue(
1652 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1653 0);
1654 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT,
1655 {Cmp, Cmp, VL, SEW}));
1656 return;
1657 }
1658 case Intrinsic::riscv_vmsgeu_mask:
1659 case Intrinsic::riscv_vmsge_mask: {
1660 SDValue Src1 = Node->getOperand(2);
1661 SDValue Src2 = Node->getOperand(3);
1662 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
1663 bool IsCmpUnsignedZero = false;
1664 // Only custom select scalar second operand.
1665 if (Src2.getValueType() != XLenVT)
1666 break;
1667 // Small constants are handled with patterns.
1668 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) {
1669 int64_t CVal = C->getSExtValue();
1670 if (CVal >= -15 && CVal <= 16) {
1671 if (!IsUnsigned || CVal != 0)
1672 break;
1673 IsCmpUnsignedZero = true;
1674 }
1675 }
1676 MVT Src1VT = Src1.getSimpleValueType();
1677 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
1678 VMOROpcode;
1679 switch (RISCVTargetLowering::getLMUL(Src1VT)) {
1680 default:
1681 llvm_unreachable("Unexpected LMUL!");
1682 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
1683 case RISCVII::VLMUL::lmulenum: \
1684 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
1685 : RISCV::PseudoVMSLT_VX_##suffix; \
1686 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
1687 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
1688 break;
1689 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
1690 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
1691 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
1692 CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
1693 CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
1694 CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
1695 CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
1696 #undef CASE_VMSLT_OPCODES
1697 }
1698 // Mask operations use the LMUL from the mask type.
1699 switch (RISCVTargetLowering::getLMUL(VT)) {
1700 default:
1701 llvm_unreachable("Unexpected LMUL!");
1702 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
1703 case RISCVII::VLMUL::lmulenum: \
1704 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
1705 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
1706 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
1707 break;
1708 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
1709 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
1710 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
1711 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
1712 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
1713 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
1714 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
1715 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES
1716 }
1717 SDValue SEW = CurDAG->getTargetConstant(
1718 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
1719 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT);
1720 SDValue VL;
1721 selectVLOp(Node->getOperand(5), VL);
1722 SDValue MaskedOff = Node->getOperand(1);
1723 SDValue Mask = Node->getOperand(4);
1724
1725 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
1726 if (IsCmpUnsignedZero) {
1727 // We don't need vmor if the MaskedOff and the Mask are the same
1728 // value.
1729 if (Mask == MaskedOff) {
1730 ReplaceUses(Node, Mask.getNode());
1731 return;
1732 }
1733 ReplaceNode(Node,
1734 CurDAG->getMachineNode(VMOROpcode, DL, VT,
1735 {Mask, MaskedOff, VL, MaskSEW}));
1736 return;
1737 }
1738
1739 // If the MaskedOff value and the Mask are the same value use
1740 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
1741 // This avoids needing to copy v0 to vd before starting the next sequence.
1742 if (Mask == MaskedOff) {
1743 SDValue Cmp = SDValue(
1744 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
1745 0);
1746 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
1747 {Mask, Cmp, VL, MaskSEW}));
1748 return;
1749 }
1750
1751 // Mask needs to be copied to V0.
1752 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1753 RISCV::V0, Mask, SDValue());
1754 SDValue Glue = Chain.getValue(1);
1755 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT);
1756
1757 // Otherwise use
1758 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
1759 // The result is mask undisturbed.
1760 // We use the same instructions to emulate mask agnostic behavior, because
1761 // the agnostic result can be either undisturbed or all 1.
1762 SDValue Cmp = SDValue(
1763 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
1764 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
1765 0);
1766 // vmxor.mm vd, vd, v0 is used to update active value.
1767 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
1768 {Cmp, Mask, VL, MaskSEW}));
1769 return;
1770 }
1771 case Intrinsic::riscv_vsetvli:
1772 case Intrinsic::riscv_vsetvlimax:
1773 return selectVSETVLI(Node);
1774 }
1775 break;
1776 }
1777 case ISD::INTRINSIC_W_CHAIN: {
1778 unsigned IntNo = Node->getConstantOperandVal(1);
1779 switch (IntNo) {
1780 // By default we do not custom select any intrinsic.
1781 default:
1782 break;
1783 case Intrinsic::riscv_vlseg2:
1784 case Intrinsic::riscv_vlseg3:
1785 case Intrinsic::riscv_vlseg4:
1786 case Intrinsic::riscv_vlseg5:
1787 case Intrinsic::riscv_vlseg6:
1788 case Intrinsic::riscv_vlseg7:
1789 case Intrinsic::riscv_vlseg8: {
1790 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
1791 return;
1792 }
1793 case Intrinsic::riscv_vlseg2_mask:
1794 case Intrinsic::riscv_vlseg3_mask:
1795 case Intrinsic::riscv_vlseg4_mask:
1796 case Intrinsic::riscv_vlseg5_mask:
1797 case Intrinsic::riscv_vlseg6_mask:
1798 case Intrinsic::riscv_vlseg7_mask:
1799 case Intrinsic::riscv_vlseg8_mask: {
1800 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
1801 return;
1802 }
1803 case Intrinsic::riscv_vlsseg2:
1804 case Intrinsic::riscv_vlsseg3:
1805 case Intrinsic::riscv_vlsseg4:
1806 case Intrinsic::riscv_vlsseg5:
1807 case Intrinsic::riscv_vlsseg6:
1808 case Intrinsic::riscv_vlsseg7:
1809 case Intrinsic::riscv_vlsseg8: {
1810 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
1811 return;
1812 }
1813 case Intrinsic::riscv_vlsseg2_mask:
1814 case Intrinsic::riscv_vlsseg3_mask:
1815 case Intrinsic::riscv_vlsseg4_mask:
1816 case Intrinsic::riscv_vlsseg5_mask:
1817 case Intrinsic::riscv_vlsseg6_mask:
1818 case Intrinsic::riscv_vlsseg7_mask:
1819 case Intrinsic::riscv_vlsseg8_mask: {
1820 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
1821 return;
1822 }
1823 case Intrinsic::riscv_vloxseg2:
1824 case Intrinsic::riscv_vloxseg3:
1825 case Intrinsic::riscv_vloxseg4:
1826 case Intrinsic::riscv_vloxseg5:
1827 case Intrinsic::riscv_vloxseg6:
1828 case Intrinsic::riscv_vloxseg7:
1829 case Intrinsic::riscv_vloxseg8:
1830 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
1831 return;
1832 case Intrinsic::riscv_vluxseg2:
1833 case Intrinsic::riscv_vluxseg3:
1834 case Intrinsic::riscv_vluxseg4:
1835 case Intrinsic::riscv_vluxseg5:
1836 case Intrinsic::riscv_vluxseg6:
1837 case Intrinsic::riscv_vluxseg7:
1838 case Intrinsic::riscv_vluxseg8:
1839 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
1840 return;
1841 case Intrinsic::riscv_vloxseg2_mask:
1842 case Intrinsic::riscv_vloxseg3_mask:
1843 case Intrinsic::riscv_vloxseg4_mask:
1844 case Intrinsic::riscv_vloxseg5_mask:
1845 case Intrinsic::riscv_vloxseg6_mask:
1846 case Intrinsic::riscv_vloxseg7_mask:
1847 case Intrinsic::riscv_vloxseg8_mask:
1848 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
1849 return;
1850 case Intrinsic::riscv_vluxseg2_mask:
1851 case Intrinsic::riscv_vluxseg3_mask:
1852 case Intrinsic::riscv_vluxseg4_mask:
1853 case Intrinsic::riscv_vluxseg5_mask:
1854 case Intrinsic::riscv_vluxseg6_mask:
1855 case Intrinsic::riscv_vluxseg7_mask:
1856 case Intrinsic::riscv_vluxseg8_mask:
1857 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
1858 return;
1859 case Intrinsic::riscv_vlseg8ff:
1860 case Intrinsic::riscv_vlseg7ff:
1861 case Intrinsic::riscv_vlseg6ff:
1862 case Intrinsic::riscv_vlseg5ff:
1863 case Intrinsic::riscv_vlseg4ff:
1864 case Intrinsic::riscv_vlseg3ff:
1865 case Intrinsic::riscv_vlseg2ff: {
1866 selectVLSEGFF(Node, /*IsMasked*/ false);
1867 return;
1868 }
1869 case Intrinsic::riscv_vlseg8ff_mask:
1870 case Intrinsic::riscv_vlseg7ff_mask:
1871 case Intrinsic::riscv_vlseg6ff_mask:
1872 case Intrinsic::riscv_vlseg5ff_mask:
1873 case Intrinsic::riscv_vlseg4ff_mask:
1874 case Intrinsic::riscv_vlseg3ff_mask:
1875 case Intrinsic::riscv_vlseg2ff_mask: {
1876 selectVLSEGFF(Node, /*IsMasked*/ true);
1877 return;
1878 }
1879 case Intrinsic::riscv_vloxei:
1880 case Intrinsic::riscv_vloxei_mask:
1881 case Intrinsic::riscv_vluxei:
1882 case Intrinsic::riscv_vluxei_mask: {
1883 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask ||
1884 IntNo == Intrinsic::riscv_vluxei_mask;
1885 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei ||
1886 IntNo == Intrinsic::riscv_vloxei_mask;
1887
1888 MVT VT = Node->getSimpleValueType(0);
1889 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1890
1891 unsigned CurOp = 2;
1892 SmallVector<SDValue, 8> Operands;
1893 Operands.push_back(Node->getOperand(CurOp++));
1894
1895 MVT IndexVT;
1896 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1897 /*IsStridedOrIndexed*/ true, Operands,
1898 /*IsLoad=*/true, &IndexVT);
1899
1900 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
1901 "Element count mismatch");
1902
1903 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1904 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
1905 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
1906 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
1907 report_fatal_error("The V extension does not support EEW=64 for index "
1908 "values when XLEN=32");
1909 }
1910 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
1911 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
1912 static_cast<unsigned>(IndexLMUL));
1913 MachineSDNode *Load =
1914 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1915
1916 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1917 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1918
1919 ReplaceNode(Node, Load);
1920 return;
1921 }
1922 case Intrinsic::riscv_vlm:
1923 case Intrinsic::riscv_vle:
1924 case Intrinsic::riscv_vle_mask:
1925 case Intrinsic::riscv_vlse:
1926 case Intrinsic::riscv_vlse_mask: {
1927 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask ||
1928 IntNo == Intrinsic::riscv_vlse_mask;
1929 bool IsStrided =
1930 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask;
1931
1932 MVT VT = Node->getSimpleValueType(0);
1933 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1934
1935 // The riscv_vlm intrinsic are always tail agnostic and no passthru
1936 // operand at the IR level. In pseudos, they have both policy and
1937 // passthru operand. The passthru operand is needed to track the
1938 // "tail undefined" state, and the policy is there just for
1939 // for consistency - it will always be "don't care" for the
1940 // unmasked form.
1941 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
1942 unsigned CurOp = 2;
1943 SmallVector<SDValue, 8> Operands;
1944 if (HasPassthruOperand)
1945 Operands.push_back(Node->getOperand(CurOp++));
1946 else {
1947 // We eagerly lower to implicit_def (instead of undef), as we
1948 // otherwise fail to select nodes such as: nxv1i1 = undef
1949 SDNode *Passthru =
1950 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
1951 Operands.push_back(SDValue(Passthru, 0));
1952 }
1953 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
1954 Operands, /*IsLoad=*/true);
1955
1956 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1957 const RISCV::VLEPseudo *P =
1958 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
1959 static_cast<unsigned>(LMUL));
1960 MachineSDNode *Load =
1961 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
1962
1963 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1964 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1965
1966 ReplaceNode(Node, Load);
1967 return;
1968 }
1969 case Intrinsic::riscv_vleff:
1970 case Intrinsic::riscv_vleff_mask: {
1971 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask;
1972
1973 MVT VT = Node->getSimpleValueType(0);
1974 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
1975
1976 unsigned CurOp = 2;
1977 SmallVector<SDValue, 7> Operands;
1978 Operands.push_back(Node->getOperand(CurOp++));
1979 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
1980 /*IsStridedOrIndexed*/ false, Operands,
1981 /*IsLoad=*/true);
1982
1983 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
1984 const RISCV::VLEPseudo *P =
1985 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
1986 Log2SEW, static_cast<unsigned>(LMUL));
1987 MachineSDNode *Load = CurDAG->getMachineNode(
1988 P->Pseudo, DL, Node->getVTList(), Operands);
1989 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
1990 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
1991
1992 ReplaceNode(Node, Load);
1993 return;
1994 }
1995 }
1996 break;
1997 }
1998 case ISD::INTRINSIC_VOID: {
1999 unsigned IntNo = Node->getConstantOperandVal(1);
2000 switch (IntNo) {
2001 case Intrinsic::riscv_vsseg2:
2002 case Intrinsic::riscv_vsseg3:
2003 case Intrinsic::riscv_vsseg4:
2004 case Intrinsic::riscv_vsseg5:
2005 case Intrinsic::riscv_vsseg6:
2006 case Intrinsic::riscv_vsseg7:
2007 case Intrinsic::riscv_vsseg8: {
2008 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false);
2009 return;
2010 }
2011 case Intrinsic::riscv_vsseg2_mask:
2012 case Intrinsic::riscv_vsseg3_mask:
2013 case Intrinsic::riscv_vsseg4_mask:
2014 case Intrinsic::riscv_vsseg5_mask:
2015 case Intrinsic::riscv_vsseg6_mask:
2016 case Intrinsic::riscv_vsseg7_mask:
2017 case Intrinsic::riscv_vsseg8_mask: {
2018 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false);
2019 return;
2020 }
2021 case Intrinsic::riscv_vssseg2:
2022 case Intrinsic::riscv_vssseg3:
2023 case Intrinsic::riscv_vssseg4:
2024 case Intrinsic::riscv_vssseg5:
2025 case Intrinsic::riscv_vssseg6:
2026 case Intrinsic::riscv_vssseg7:
2027 case Intrinsic::riscv_vssseg8: {
2028 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true);
2029 return;
2030 }
2031 case Intrinsic::riscv_vssseg2_mask:
2032 case Intrinsic::riscv_vssseg3_mask:
2033 case Intrinsic::riscv_vssseg4_mask:
2034 case Intrinsic::riscv_vssseg5_mask:
2035 case Intrinsic::riscv_vssseg6_mask:
2036 case Intrinsic::riscv_vssseg7_mask:
2037 case Intrinsic::riscv_vssseg8_mask: {
2038 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true);
2039 return;
2040 }
2041 case Intrinsic::riscv_vsoxseg2:
2042 case Intrinsic::riscv_vsoxseg3:
2043 case Intrinsic::riscv_vsoxseg4:
2044 case Intrinsic::riscv_vsoxseg5:
2045 case Intrinsic::riscv_vsoxseg6:
2046 case Intrinsic::riscv_vsoxseg7:
2047 case Intrinsic::riscv_vsoxseg8:
2048 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true);
2049 return;
2050 case Intrinsic::riscv_vsuxseg2:
2051 case Intrinsic::riscv_vsuxseg3:
2052 case Intrinsic::riscv_vsuxseg4:
2053 case Intrinsic::riscv_vsuxseg5:
2054 case Intrinsic::riscv_vsuxseg6:
2055 case Intrinsic::riscv_vsuxseg7:
2056 case Intrinsic::riscv_vsuxseg8:
2057 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false);
2058 return;
2059 case Intrinsic::riscv_vsoxseg2_mask:
2060 case Intrinsic::riscv_vsoxseg3_mask:
2061 case Intrinsic::riscv_vsoxseg4_mask:
2062 case Intrinsic::riscv_vsoxseg5_mask:
2063 case Intrinsic::riscv_vsoxseg6_mask:
2064 case Intrinsic::riscv_vsoxseg7_mask:
2065 case Intrinsic::riscv_vsoxseg8_mask:
2066 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true);
2067 return;
2068 case Intrinsic::riscv_vsuxseg2_mask:
2069 case Intrinsic::riscv_vsuxseg3_mask:
2070 case Intrinsic::riscv_vsuxseg4_mask:
2071 case Intrinsic::riscv_vsuxseg5_mask:
2072 case Intrinsic::riscv_vsuxseg6_mask:
2073 case Intrinsic::riscv_vsuxseg7_mask:
2074 case Intrinsic::riscv_vsuxseg8_mask:
2075 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false);
2076 return;
2077 case Intrinsic::riscv_vsoxei:
2078 case Intrinsic::riscv_vsoxei_mask:
2079 case Intrinsic::riscv_vsuxei:
2080 case Intrinsic::riscv_vsuxei_mask: {
2081 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask ||
2082 IntNo == Intrinsic::riscv_vsuxei_mask;
2083 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei ||
2084 IntNo == Intrinsic::riscv_vsoxei_mask;
2085
2086 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2087 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2088
2089 unsigned CurOp = 2;
2090 SmallVector<SDValue, 8> Operands;
2091 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2092
2093 MVT IndexVT;
2094 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
2095 /*IsStridedOrIndexed*/ true, Operands,
2096 /*IsLoad=*/false, &IndexVT);
2097
2098 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
2099 "Element count mismatch");
2100
2101 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2102 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT);
2103 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits());
2104 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) {
2105 report_fatal_error("The V extension does not support EEW=64 for index "
2106 "values when XLEN=32");
2107 }
2108 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
2109 IsMasked, IsOrdered, IndexLog2EEW,
2110 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
2111 MachineSDNode *Store =
2112 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2113
2114 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2115 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2116
2117 ReplaceNode(Node, Store);
2118 return;
2119 }
2120 case Intrinsic::riscv_vsm:
2121 case Intrinsic::riscv_vse:
2122 case Intrinsic::riscv_vse_mask:
2123 case Intrinsic::riscv_vsse:
2124 case Intrinsic::riscv_vsse_mask: {
2125 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask ||
2126 IntNo == Intrinsic::riscv_vsse_mask;
2127 bool IsStrided =
2128 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask;
2129
2130 MVT VT = Node->getOperand(2)->getSimpleValueType(0);
2131 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2132
2133 unsigned CurOp = 2;
2134 SmallVector<SDValue, 8> Operands;
2135 Operands.push_back(Node->getOperand(CurOp++)); // Store value.
2136
2137 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
2138 Operands);
2139
2140 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2141 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo(
2142 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL));
2143 MachineSDNode *Store =
2144 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
2145 if (auto *MemOp = dyn_cast<MemSDNode>(Node))
2146 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()});
2147
2148 ReplaceNode(Node, Store);
2149 return;
2150 }
2151 case Intrinsic::riscv_sf_vc_x_se:
2152 case Intrinsic::riscv_sf_vc_i_se:
2153 selectSF_VC_X_SE(Node);
2154 return;
2155 }
2156 break;
2157 }
2158 case ISD::BITCAST: {
2159 MVT SrcVT = Node->getOperand(0).getSimpleValueType();
2160 // Just drop bitcasts between vectors if both are fixed or both are
2161 // scalable.
2162 if ((VT.isScalableVector() && SrcVT.isScalableVector()) ||
2163 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) {
2164 ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
2165 CurDAG->RemoveDeadNode(Node);
2166 return;
2167 }
2168 break;
2169 }
2170 case ISD::INSERT_SUBVECTOR: {
2171 SDValue V = Node->getOperand(0);
2172 SDValue SubV = Node->getOperand(1);
2173 SDLoc DL(SubV);
2174 auto Idx = Node->getConstantOperandVal(2);
2175 MVT SubVecVT = SubV.getSimpleValueType();
2176
2177 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2178 MVT SubVecContainerVT = SubVecVT;
2179 // Establish the correct scalable-vector types for any fixed-length type.
2180 if (SubVecVT.isFixedLengthVector()) {
2181 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
2182 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock);
2183 [[maybe_unused]] bool ExactlyVecRegSized =
2184 Subtarget->expandVScale(SubVecVT.getSizeInBits())
2185 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
2186 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
2187 .getKnownMinValue()));
2188 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
2189 }
2190 MVT ContainerVT = VT;
2191 if (VT.isFixedLengthVector())
2192 ContainerVT = TLI.getContainerForFixedLengthVector(VT);
2193
2194 const auto *TRI = Subtarget->getRegisterInfo();
2195 unsigned SubRegIdx;
2196 std::tie(SubRegIdx, Idx) =
2197 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2198 ContainerVT, SubVecContainerVT, Idx, TRI);
2199
2200 // If the Idx hasn't been completely eliminated then this is a subvector
2201 // insert which doesn't naturally align to a vector register. These must
2202 // be handled using instructions to manipulate the vector registers.
2203 if (Idx != 0)
2204 break;
2205
2206 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT);
2207 [[maybe_unused]] bool IsSubVecPartReg =
2208 SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
2209 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
2210 SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
2211 assert((!IsSubVecPartReg || V.isUndef()) &&
2212 "Expecting lowering to have created legal INSERT_SUBVECTORs when "
2213 "the subvector is smaller than a full-sized register");
2214
2215 // If we haven't set a SubRegIdx, then we must be going between
2216 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy.
2217 if (SubRegIdx == RISCV::NoSubRegister) {
2218 unsigned InRegClassID =
2219 RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT);
2220 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2221 InRegClassID &&
2222 "Unexpected subvector extraction");
2223 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2224 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2225 DL, VT, SubV, RC);
2226 ReplaceNode(Node, NewNode);
2227 return;
2228 }
2229
2230 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV);
2231 ReplaceNode(Node, Insert.getNode());
2232 return;
2233 }
2234 case ISD::EXTRACT_SUBVECTOR: {
2235 SDValue V = Node->getOperand(0);
2236 auto Idx = Node->getConstantOperandVal(1);
2237 MVT InVT = V.getSimpleValueType();
2238 SDLoc DL(V);
2239
2240 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering();
2241 MVT SubVecContainerVT = VT;
2242 // Establish the correct scalable-vector types for any fixed-length type.
2243 if (VT.isFixedLengthVector()) {
2244 assert(Idx == 0);
2245 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT);
2246 }
2247 if (InVT.isFixedLengthVector())
2248 InVT = TLI.getContainerForFixedLengthVector(InVT);
2249
2250 const auto *TRI = Subtarget->getRegisterInfo();
2251 unsigned SubRegIdx;
2252 std::tie(SubRegIdx, Idx) =
2253 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
2254 InVT, SubVecContainerVT, Idx, TRI);
2255
2256 // If the Idx hasn't been completely eliminated then this is a subvector
2257 // extract which doesn't naturally align to a vector register. These must
2258 // be handled using instructions to manipulate the vector registers.
2259 if (Idx != 0)
2260 break;
2261
2262 // If we haven't set a SubRegIdx, then we must be going between
2263 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy.
2264 if (SubRegIdx == RISCV::NoSubRegister) {
2265 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT);
2266 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) ==
2267 InRegClassID &&
2268 "Unexpected subvector extraction");
2269 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT);
2270 SDNode *NewNode =
2271 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
2272 ReplaceNode(Node, NewNode);
2273 return;
2274 }
2275
2276 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V);
2277 ReplaceNode(Node, Extract.getNode());
2278 return;
2279 }
2280 case RISCVISD::VMV_S_X_VL:
2281 case RISCVISD::VFMV_S_F_VL:
2282 case RISCVISD::VMV_V_X_VL:
2283 case RISCVISD::VFMV_V_F_VL: {
2284 // Try to match splat of a scalar load to a strided load with stride of x0.
2285 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
2286 Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
2287 if (!Node->getOperand(0).isUndef())
2288 break;
2289 SDValue Src = Node->getOperand(1);
2290 auto *Ld = dyn_cast<LoadSDNode>(Src);
2291 // Can't fold load update node because the second
2292 // output is used so that load update node can't be removed.
2293 if (!Ld || Ld->isIndexed())
2294 break;
2295 EVT MemVT = Ld->getMemoryVT();
2296 // The memory VT should be the same size as the element type.
2297 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize())
2298 break;
2299 if (!IsProfitableToFold(Src, Node, Node) ||
2300 !IsLegalToFold(Src, Node, Node, TM.getOptLevel()))
2301 break;
2302
2303 SDValue VL;
2304 if (IsScalarMove) {
2305 // We could deal with more VL if we update the VSETVLI insert pass to
2306 // avoid introducing more VSETVLI.
2307 if (!isOneConstant(Node->getOperand(2)))
2308 break;
2309 selectVLOp(Node->getOperand(2), VL);
2310 } else
2311 selectVLOp(Node->getOperand(2), VL);
2312
2313 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
2314 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
2315
2316 // If VL=1, then we don't need to do a strided load and can just do a
2317 // regular load.
2318 bool IsStrided = !isOneConstant(VL);
2319
2320 // Only do a strided load if we have optimized zero-stride vector load.
2321 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
2322 break;
2323
2324 SmallVector<SDValue> Operands = {
2325 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
2326 Ld->getBasePtr()};
2327 if (IsStrided)
2328 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
2329 uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
2330 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
2331 Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
2332
2333 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
2334 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
2335 /*IsMasked*/ false, IsStrided, /*FF*/ false,
2336 Log2SEW, static_cast<unsigned>(LMUL));
2337 MachineSDNode *Load =
2338 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
2339 // Update the chain.
2340 ReplaceUses(Src.getValue(1), SDValue(Load, 1));
2341 // Record the mem-refs
2342 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
2343 // Replace the splat with the vlse.
2344 ReplaceNode(Node, Load);
2345 return;
2346 }
2347 case ISD::PREFETCH:
2348 unsigned Locality = Node->getConstantOperandVal(3);
2349 if (Locality > 2)
2350 break;
2351
2352 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
2353 MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
2354 MMO->setFlags(MachineMemOperand::MONonTemporal);
2355
2356 int NontemporalLevel = 0;
2357 switch (Locality) {
2358 case 0:
2359 NontemporalLevel = 3; // NTL.ALL
2360 break;
2361 case 1:
2362 NontemporalLevel = 1; // NTL.PALL
2363 break;
2364 case 2:
2365 NontemporalLevel = 0; // NTL.P1
2366 break;
2367 default:
2368 llvm_unreachable("unexpected locality value.");
2369 }
2370
2371 if (NontemporalLevel & 0b1)
2372 MMO->setFlags(MONontemporalBit0);
2373 if (NontemporalLevel & 0b10)
2374 MMO->setFlags(MONontemporalBit1);
2375 }
2376 break;
2377 }
2378
2379 // Select the default instruction.
2380 SelectCode(Node);
2381 }
2382
SelectInlineAsmMemoryOperand(const SDValue & Op,InlineAsm::ConstraintCode ConstraintID,std::vector<SDValue> & OutOps)2383 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
2384 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
2385 std::vector<SDValue> &OutOps) {
2386 // Always produce a register and immediate operand, as expected by
2387 // RISCVAsmPrinter::PrintAsmMemoryOperand.
2388 switch (ConstraintID) {
2389 case InlineAsm::ConstraintCode::o:
2390 case InlineAsm::ConstraintCode::m: {
2391 SDValue Op0, Op1;
2392 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1);
2393 assert(Found && "SelectAddrRegImm should always succeed");
2394 OutOps.push_back(Op0);
2395 OutOps.push_back(Op1);
2396 return false;
2397 }
2398 case InlineAsm::ConstraintCode::A:
2399 OutOps.push_back(Op);
2400 OutOps.push_back(
2401 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
2402 return false;
2403 default:
2404 report_fatal_error("Unexpected asm memory constraint " +
2405 InlineAsm::getMemConstraintName(ConstraintID));
2406 }
2407
2408 return true;
2409 }
2410
SelectAddrFrameIndex(SDValue Addr,SDValue & Base,SDValue & Offset)2411 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
2412 SDValue &Offset) {
2413 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
2414 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
2415 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
2416 return true;
2417 }
2418
2419 return false;
2420 }
2421
2422 // Select a frame index and an optional immediate offset from an ADD or OR.
SelectFrameAddrRegImm(SDValue Addr,SDValue & Base,SDValue & Offset)2423 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
2424 SDValue &Offset) {
2425 if (SelectAddrFrameIndex(Addr, Base, Offset))
2426 return true;
2427
2428 if (!CurDAG->isBaseWithConstantOffset(Addr))
2429 return false;
2430
2431 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
2432 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2433 if (isInt<12>(CVal)) {
2434 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
2435 Subtarget->getXLenVT());
2436 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
2437 Subtarget->getXLenVT());
2438 return true;
2439 }
2440 }
2441
2442 return false;
2443 }
2444
2445 // Fold constant addresses.
selectConstantAddr(SelectionDAG * CurDAG,const SDLoc & DL,const MVT VT,const RISCVSubtarget * Subtarget,SDValue Addr,SDValue & Base,SDValue & Offset,bool IsPrefetch=false)2446 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
2447 const MVT VT, const RISCVSubtarget *Subtarget,
2448 SDValue Addr, SDValue &Base, SDValue &Offset,
2449 bool IsPrefetch = false) {
2450 if (!isa<ConstantSDNode>(Addr))
2451 return false;
2452
2453 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
2454
2455 // If the constant is a simm12, we can fold the whole constant and use X0 as
2456 // the base. If the constant can be materialized with LUI+simm12, use LUI as
2457 // the base. We can't use generateInstSeq because it favors LUI+ADDIW.
2458 int64_t Lo12 = SignExtend64<12>(CVal);
2459 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
2460 if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
2461 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2462 return false;
2463
2464 if (Hi) {
2465 int64_t Hi20 = (Hi >> 12) & 0xfffff;
2466 Base = SDValue(
2467 CurDAG->getMachineNode(RISCV::LUI, DL, VT,
2468 CurDAG->getTargetConstant(Hi20, DL, VT)),
2469 0);
2470 } else {
2471 Base = CurDAG->getRegister(RISCV::X0, VT);
2472 }
2473 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2474 return true;
2475 }
2476
2477 // Ask how constant materialization would handle this constant.
2478 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
2479
2480 // If the last instruction would be an ADDI, we can fold its immediate and
2481 // emit the rest of the sequence as the base.
2482 if (Seq.back().getOpcode() != RISCV::ADDI)
2483 return false;
2484 Lo12 = Seq.back().getImm();
2485 if (IsPrefetch && (Lo12 & 0b11111) != 0)
2486 return false;
2487
2488 // Drop the last instruction.
2489 Seq.pop_back();
2490 assert(!Seq.empty() && "Expected more instructions in sequence");
2491
2492 Base = selectImmSeq(CurDAG, DL, VT, Seq);
2493 Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
2494 return true;
2495 }
2496
2497 // Is this ADD instruction only used as the base pointer of scalar loads and
2498 // stores?
isWorthFoldingAdd(SDValue Add)2499 static bool isWorthFoldingAdd(SDValue Add) {
2500 for (auto *Use : Add->uses()) {
2501 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
2502 Use->getOpcode() != ISD::ATOMIC_LOAD &&
2503 Use->getOpcode() != ISD::ATOMIC_STORE)
2504 return false;
2505 EVT VT = cast<MemSDNode>(Use)->getMemoryVT();
2506 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 &&
2507 VT != MVT::f64)
2508 return false;
2509 // Don't allow stores of the value. It must be used as the address.
2510 if (Use->getOpcode() == ISD::STORE &&
2511 cast<StoreSDNode>(Use)->getValue() == Add)
2512 return false;
2513 if (Use->getOpcode() == ISD::ATOMIC_STORE &&
2514 cast<AtomicSDNode>(Use)->getVal() == Add)
2515 return false;
2516 }
2517
2518 return true;
2519 }
2520
SelectAddrRegRegScale(SDValue Addr,unsigned MaxShiftAmount,SDValue & Base,SDValue & Index,SDValue & Scale)2521 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
2522 unsigned MaxShiftAmount,
2523 SDValue &Base, SDValue &Index,
2524 SDValue &Scale) {
2525 EVT VT = Addr.getSimpleValueType();
2526 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
2527 SDValue &Shift) {
2528 uint64_t ShiftAmt = 0;
2529 Index = N;
2530
2531 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
2532 // Only match shifts by a value in range [0, MaxShiftAmount].
2533 if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
2534 Index = N.getOperand(0);
2535 ShiftAmt = N.getConstantOperandVal(1);
2536 }
2537 }
2538
2539 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
2540 return ShiftAmt != 0;
2541 };
2542
2543 if (Addr.getOpcode() == ISD::ADD) {
2544 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
2545 SDValue AddrB = Addr.getOperand(0);
2546 if (AddrB.getOpcode() == ISD::ADD &&
2547 UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
2548 !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
2549 isInt<12>(C1->getSExtValue())) {
2550 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
2551 SDValue C1Val =
2552 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
2553 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
2554 AddrB.getOperand(1), C1Val),
2555 0);
2556 return true;
2557 }
2558 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
2559 Base = Addr.getOperand(1);
2560 return true;
2561 } else {
2562 UnwrapShl(Addr.getOperand(1), Index, Scale);
2563 Base = Addr.getOperand(0);
2564 return true;
2565 }
2566 } else if (UnwrapShl(Addr, Index, Scale)) {
2567 EVT VT = Addr.getValueType();
2568 Base = CurDAG->getRegister(RISCV::X0, VT);
2569 return true;
2570 }
2571
2572 return false;
2573 }
2574
SelectAddrRegImm(SDValue Addr,SDValue & Base,SDValue & Offset,bool IsINX)2575 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
2576 SDValue &Offset, bool IsINX) {
2577 if (SelectAddrFrameIndex(Addr, Base, Offset))
2578 return true;
2579
2580 SDLoc DL(Addr);
2581 MVT VT = Addr.getSimpleValueType();
2582
2583 if (Addr.getOpcode() == RISCVISD::ADD_LO) {
2584 Base = Addr.getOperand(0);
2585 Offset = Addr.getOperand(1);
2586 return true;
2587 }
2588
2589 int64_t RV32ZdinxRange = IsINX ? 4 : 0;
2590 if (CurDAG->isBaseWithConstantOffset(Addr)) {
2591 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2592 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
2593 Base = Addr.getOperand(0);
2594 if (Base.getOpcode() == RISCVISD::ADD_LO) {
2595 SDValue LoOperand = Base.getOperand(1);
2596 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
2597 // If the Lo in (ADD_LO hi, lo) is a global variable's address
2598 // (its low part, really), then we can rely on the alignment of that
2599 // variable to provide a margin of safety before low part can overflow
2600 // the 12 bits of the load/store offset. Check if CVal falls within
2601 // that margin; if so (low part + CVal) can't overflow.
2602 const DataLayout &DL = CurDAG->getDataLayout();
2603 Align Alignment = commonAlignment(
2604 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
2605 if (CVal == 0 || Alignment > CVal) {
2606 int64_t CombinedOffset = CVal + GA->getOffset();
2607 Base = Base.getOperand(0);
2608 Offset = CurDAG->getTargetGlobalAddress(
2609 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
2610 CombinedOffset, GA->getTargetFlags());
2611 return true;
2612 }
2613 }
2614 }
2615
2616 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2617 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2618 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2619 return true;
2620 }
2621 }
2622
2623 // Handle ADD with large immediates.
2624 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2625 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2626 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
2627 "simm12 not already handled?");
2628
2629 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
2630 // an ADDI for part of the offset and fold the rest into the load/store.
2631 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
2632 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
2633 int64_t Adj = CVal < 0 ? -2048 : 2047;
2634 Base = SDValue(
2635 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
2636 CurDAG->getTargetConstant(Adj, DL, VT)),
2637 0);
2638 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
2639 return true;
2640 }
2641
2642 // For larger immediates, we might be able to save one instruction from
2643 // constant materialization by folding the Lo12 bits of the immediate into
2644 // the address. We should only do this if the ADD is only used by loads and
2645 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled
2646 // separately with the full materialized immediate creating extra
2647 // instructions.
2648 if (isWorthFoldingAdd(Addr) &&
2649 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2650 Offset)) {
2651 // Insert an ADD instruction with the materialized Hi52 bits.
2652 Base = SDValue(
2653 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2654 0);
2655 return true;
2656 }
2657 }
2658
2659 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset))
2660 return true;
2661
2662 Base = Addr;
2663 Offset = CurDAG->getTargetConstant(0, DL, VT);
2664 return true;
2665 }
2666
2667 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of
2668 /// Offset shoule be all zeros.
SelectAddrRegImmLsb00000(SDValue Addr,SDValue & Base,SDValue & Offset)2669 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
2670 SDValue &Offset) {
2671 if (SelectAddrFrameIndex(Addr, Base, Offset))
2672 return true;
2673
2674 SDLoc DL(Addr);
2675 MVT VT = Addr.getSimpleValueType();
2676
2677 if (CurDAG->isBaseWithConstantOffset(Addr)) {
2678 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2679 if (isInt<12>(CVal)) {
2680 Base = Addr.getOperand(0);
2681
2682 // Early-out if not a valid offset.
2683 if ((CVal & 0b11111) != 0) {
2684 Base = Addr;
2685 Offset = CurDAG->getTargetConstant(0, DL, VT);
2686 return true;
2687 }
2688
2689 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
2690 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
2691 Offset = CurDAG->getTargetConstant(CVal, DL, VT);
2692 return true;
2693 }
2694 }
2695
2696 // Handle ADD with large immediates.
2697 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
2698 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
2699 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
2700 "simm12 not already handled?");
2701
2702 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
2703 // one instruction by folding adjustment (-2048 or 2016) into the address.
2704 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
2705 int64_t Adj = CVal < 0 ? -2048 : 2016;
2706 int64_t AdjustedOffset = CVal - Adj;
2707 Base = SDValue(CurDAG->getMachineNode(
2708 RISCV::ADDI, DL, VT, Addr.getOperand(0),
2709 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
2710 0);
2711 Offset = CurDAG->getTargetConstant(Adj, DL, VT);
2712 return true;
2713 }
2714
2715 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
2716 Offset, true)) {
2717 // Insert an ADD instruction with the materialized Hi52 bits.
2718 Base = SDValue(
2719 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
2720 0);
2721 return true;
2722 }
2723 }
2724
2725 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
2726 return true;
2727
2728 Base = Addr;
2729 Offset = CurDAG->getTargetConstant(0, DL, VT);
2730 return true;
2731 }
2732
SelectAddrRegReg(SDValue Addr,SDValue & Base,SDValue & Offset)2733 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base,
2734 SDValue &Offset) {
2735 if (Addr.getOpcode() != ISD::ADD)
2736 return false;
2737
2738 if (isa<ConstantSDNode>(Addr.getOperand(1)))
2739 return false;
2740
2741 Base = Addr.getOperand(1);
2742 Offset = Addr.getOperand(0);
2743 return true;
2744 }
2745
selectShiftMask(SDValue N,unsigned ShiftWidth,SDValue & ShAmt)2746 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
2747 SDValue &ShAmt) {
2748 ShAmt = N;
2749
2750 // Peek through zext.
2751 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
2752 ShAmt = ShAmt.getOperand(0);
2753
2754 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
2755 // amount. If there is an AND on the shift amount, we can bypass it if it
2756 // doesn't affect any of those bits.
2757 if (ShAmt.getOpcode() == ISD::AND &&
2758 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2759 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
2760
2761 // Since the max shift amount is a power of 2 we can subtract 1 to make a
2762 // mask that covers the bits needed to represent all shift amounts.
2763 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!");
2764 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1);
2765
2766 if (ShMask.isSubsetOf(AndMask)) {
2767 ShAmt = ShAmt.getOperand(0);
2768 } else {
2769 // SimplifyDemandedBits may have optimized the mask so try restoring any
2770 // bits that are known zero.
2771 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0));
2772 if (!ShMask.isSubsetOf(AndMask | Known.Zero))
2773 return true;
2774 ShAmt = ShAmt.getOperand(0);
2775 }
2776 }
2777
2778 if (ShAmt.getOpcode() == ISD::ADD &&
2779 isa<ConstantSDNode>(ShAmt.getOperand(1))) {
2780 uint64_t Imm = ShAmt.getConstantOperandVal(1);
2781 // If we are shifting by X+N where N == 0 mod Size, then just shift by X
2782 // to avoid the ADD.
2783 if (Imm != 0 && Imm % ShiftWidth == 0) {
2784 ShAmt = ShAmt.getOperand(0);
2785 return true;
2786 }
2787 } else if (ShAmt.getOpcode() == ISD::SUB &&
2788 isa<ConstantSDNode>(ShAmt.getOperand(0))) {
2789 uint64_t Imm = ShAmt.getConstantOperandVal(0);
2790 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
2791 // generate a NEG instead of a SUB of a constant.
2792 if (Imm != 0 && Imm % ShiftWidth == 0) {
2793 SDLoc DL(ShAmt);
2794 EVT VT = ShAmt.getValueType();
2795 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
2796 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
2797 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
2798 ShAmt.getOperand(1));
2799 ShAmt = SDValue(Neg, 0);
2800 return true;
2801 }
2802 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
2803 // to generate a NOT instead of a SUB of a constant.
2804 if (Imm % ShiftWidth == ShiftWidth - 1) {
2805 SDLoc DL(ShAmt);
2806 EVT VT = ShAmt.getValueType();
2807 MachineSDNode *Not =
2808 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1),
2809 CurDAG->getTargetConstant(-1, DL, VT));
2810 ShAmt = SDValue(Not, 0);
2811 return true;
2812 }
2813 }
2814
2815 return true;
2816 }
2817
2818 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can
2819 /// check for equality with 0. This function emits instructions that convert the
2820 /// seteq/setne into something that can be compared with 0.
2821 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
2822 /// ISD::SETNE).
selectSETCC(SDValue N,ISD::CondCode ExpectedCCVal,SDValue & Val)2823 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
2824 SDValue &Val) {
2825 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
2826 "Unexpected condition code!");
2827
2828 // We're looking for a setcc.
2829 if (N->getOpcode() != ISD::SETCC)
2830 return false;
2831
2832 // Must be an equality comparison.
2833 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
2834 if (CCVal != ExpectedCCVal)
2835 return false;
2836
2837 SDValue LHS = N->getOperand(0);
2838 SDValue RHS = N->getOperand(1);
2839
2840 if (!LHS.getValueType().isScalarInteger())
2841 return false;
2842
2843 // If the RHS side is 0, we don't need any extra instructions, return the LHS.
2844 if (isNullConstant(RHS)) {
2845 Val = LHS;
2846 return true;
2847 }
2848
2849 SDLoc DL(N);
2850
2851 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
2852 int64_t CVal = C->getSExtValue();
2853 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
2854 // non-zero otherwise.
2855 if (CVal == -2048) {
2856 Val =
2857 SDValue(CurDAG->getMachineNode(
2858 RISCV::XORI, DL, N->getValueType(0), LHS,
2859 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
2860 0);
2861 return true;
2862 }
2863 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
2864 // LHS is equal to the RHS and non-zero otherwise.
2865 if (isInt<12>(CVal) || CVal == 2048) {
2866 Val =
2867 SDValue(CurDAG->getMachineNode(
2868 RISCV::ADDI, DL, N->getValueType(0), LHS,
2869 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
2870 0);
2871 return true;
2872 }
2873 }
2874
2875 // If nothing else we can XOR the LHS and RHS to produce zero if they are
2876 // equal and a non-zero value if they aren't.
2877 Val = SDValue(
2878 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
2879 return true;
2880 }
2881
selectSExtBits(SDValue N,unsigned Bits,SDValue & Val)2882 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2883 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2884 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
2885 Val = N.getOperand(0);
2886 return true;
2887 }
2888
2889 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
2890 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
2891 return N;
2892
2893 SDValue N0 = N.getOperand(0);
2894 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
2895 N.getConstantOperandVal(1) == ShiftAmt &&
2896 N0.getConstantOperandVal(1) == ShiftAmt)
2897 return N0.getOperand(0);
2898
2899 return N;
2900 };
2901
2902 MVT VT = N.getSimpleValueType();
2903 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
2904 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
2905 return true;
2906 }
2907
2908 return false;
2909 }
2910
selectZExtBits(SDValue N,unsigned Bits,SDValue & Val)2911 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) {
2912 if (N.getOpcode() == ISD::AND) {
2913 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
2914 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
2915 Val = N.getOperand(0);
2916 return true;
2917 }
2918 }
2919 MVT VT = N.getSimpleValueType();
2920 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits);
2921 if (CurDAG->MaskedValueIsZero(N, Mask)) {
2922 Val = N;
2923 return true;
2924 }
2925
2926 return false;
2927 }
2928
2929 /// Look for various patterns that can be done with a SHL that can be folded
2930 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which
2931 /// SHXADD we are trying to match.
selectSHXADDOp(SDValue N,unsigned ShAmt,SDValue & Val)2932 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
2933 SDValue &Val) {
2934 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
2935 SDValue N0 = N.getOperand(0);
2936
2937 bool LeftShift = N0.getOpcode() == ISD::SHL;
2938 if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
2939 isa<ConstantSDNode>(N0.getOperand(1))) {
2940 uint64_t Mask = N.getConstantOperandVal(1);
2941 unsigned C2 = N0.getConstantOperandVal(1);
2942
2943 unsigned XLen = Subtarget->getXLen();
2944 if (LeftShift)
2945 Mask &= maskTrailingZeros<uint64_t>(C2);
2946 else
2947 Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
2948
2949 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
2950 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
2951 // followed by a SHXADD with c3 for the X amount.
2952 if (isShiftedMask_64(Mask)) {
2953 unsigned Leading = XLen - llvm::bit_width(Mask);
2954 unsigned Trailing = llvm::countr_zero(Mask);
2955 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
2956 SDLoc DL(N);
2957 EVT VT = N.getValueType();
2958 Val = SDValue(CurDAG->getMachineNode(
2959 RISCV::SRLI, DL, VT, N0.getOperand(0),
2960 CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
2961 0);
2962 return true;
2963 }
2964 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
2965 // leading zeros and c3 trailing zeros. We can use an SRLI by C3
2966 // followed by a SHXADD using c3 for the X amount.
2967 if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
2968 SDLoc DL(N);
2969 EVT VT = N.getValueType();
2970 Val = SDValue(
2971 CurDAG->getMachineNode(
2972 RISCV::SRLI, DL, VT, N0.getOperand(0),
2973 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
2974 0);
2975 return true;
2976 }
2977 }
2978 }
2979 }
2980
2981 bool LeftShift = N.getOpcode() == ISD::SHL;
2982 if ((LeftShift || N.getOpcode() == ISD::SRL) &&
2983 isa<ConstantSDNode>(N.getOperand(1))) {
2984 SDValue N0 = N.getOperand(0);
2985 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
2986 isa<ConstantSDNode>(N0.getOperand(1))) {
2987 uint64_t Mask = N0.getConstantOperandVal(1);
2988 if (isShiftedMask_64(Mask)) {
2989 unsigned C1 = N.getConstantOperandVal(1);
2990 unsigned XLen = Subtarget->getXLen();
2991 unsigned Leading = XLen - llvm::bit_width(Mask);
2992 unsigned Trailing = llvm::countr_zero(Mask);
2993 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
2994 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
2995 if (LeftShift && Leading == 32 && Trailing > 0 &&
2996 (Trailing + C1) == ShAmt) {
2997 SDLoc DL(N);
2998 EVT VT = N.getValueType();
2999 Val = SDValue(CurDAG->getMachineNode(
3000 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3001 CurDAG->getTargetConstant(Trailing, DL, VT)),
3002 0);
3003 return true;
3004 }
3005 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and
3006 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD.
3007 if (!LeftShift && Leading == 32 && Trailing > C1 &&
3008 (Trailing - C1) == ShAmt) {
3009 SDLoc DL(N);
3010 EVT VT = N.getValueType();
3011 Val = SDValue(CurDAG->getMachineNode(
3012 RISCV::SRLIW, DL, VT, N0.getOperand(0),
3013 CurDAG->getTargetConstant(Trailing, DL, VT)),
3014 0);
3015 return true;
3016 }
3017 }
3018 }
3019 }
3020
3021 return false;
3022 }
3023
3024 /// Look for various patterns that can be done with a SHL that can be folded
3025 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which
3026 /// SHXADD_UW we are trying to match.
selectSHXADD_UWOp(SDValue N,unsigned ShAmt,SDValue & Val)3027 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
3028 SDValue &Val) {
3029 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) &&
3030 N.hasOneUse()) {
3031 SDValue N0 = N.getOperand(0);
3032 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
3033 N0.hasOneUse()) {
3034 uint64_t Mask = N.getConstantOperandVal(1);
3035 unsigned C2 = N0.getConstantOperandVal(1);
3036
3037 Mask &= maskTrailingZeros<uint64_t>(C2);
3038
3039 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with
3040 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
3041 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
3042 if (isShiftedMask_64(Mask)) {
3043 unsigned Leading = llvm::countl_zero(Mask);
3044 unsigned Trailing = llvm::countr_zero(Mask);
3045 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
3046 SDLoc DL(N);
3047 EVT VT = N.getValueType();
3048 Val = SDValue(CurDAG->getMachineNode(
3049 RISCV::SLLI, DL, VT, N0.getOperand(0),
3050 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)),
3051 0);
3052 return true;
3053 }
3054 }
3055 }
3056 }
3057
3058 return false;
3059 }
3060
vectorPseudoHasAllNBitUsers(SDNode * User,unsigned UserOpNo,unsigned Bits,const TargetInstrInfo * TII)3061 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
3062 unsigned Bits,
3063 const TargetInstrInfo *TII) {
3064 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
3065
3066 if (!MCOpcode)
3067 return false;
3068
3069 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
3070 const uint64_t TSFlags = MCID.TSFlags;
3071 if (!RISCVII::hasSEWOp(TSFlags))
3072 return false;
3073 assert(RISCVII::hasVLOp(TSFlags));
3074
3075 bool HasGlueOp = User->getGluedNode() != nullptr;
3076 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
3077 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
3078 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
3079 unsigned VLIdx =
3080 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3081 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
3082
3083 if (UserOpNo == VLIdx)
3084 return false;
3085
3086 auto NumDemandedBits =
3087 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
3088 return NumDemandedBits && Bits >= *NumDemandedBits;
3089 }
3090
3091 // Return true if all users of this SDNode* only consume the lower \p Bits.
3092 // This can be used to form W instructions for add/sub/mul/shl even when the
3093 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
3094 // SimplifyDemandedBits has made it so some users see a sext_inreg and some
3095 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
3096 // the add/sub/mul/shl to become non-W instructions. By checking the users we
3097 // may be able to use a W instruction and CSE with the other instruction if
3098 // this has happened. We could try to detect that the CSE opportunity exists
3099 // before doing this, but that would be more complicated.
hasAllNBitUsers(SDNode * Node,unsigned Bits,const unsigned Depth) const3100 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
3101 const unsigned Depth) const {
3102 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
3103 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
3104 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND ||
3105 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR ||
3106 Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
3107 isa<ConstantSDNode>(Node) || Depth != 0) &&
3108 "Unexpected opcode");
3109
3110 if (Depth >= SelectionDAG::MaxRecursionDepth)
3111 return false;
3112
3113 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
3114 // the VT. Ensure the type is scalar to avoid wasting time on vectors.
3115 if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
3116 return false;
3117
3118 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
3119 SDNode *User = *UI;
3120 // Users of this node should have already been instruction selected
3121 if (!User->isMachineOpcode())
3122 return false;
3123
3124 // TODO: Add more opcodes?
3125 switch (User->getMachineOpcode()) {
3126 default:
3127 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
3128 break;
3129 return false;
3130 case RISCV::ADDW:
3131 case RISCV::ADDIW:
3132 case RISCV::SUBW:
3133 case RISCV::MULW:
3134 case RISCV::SLLW:
3135 case RISCV::SLLIW:
3136 case RISCV::SRAW:
3137 case RISCV::SRAIW:
3138 case RISCV::SRLW:
3139 case RISCV::SRLIW:
3140 case RISCV::DIVW:
3141 case RISCV::DIVUW:
3142 case RISCV::REMW:
3143 case RISCV::REMUW:
3144 case RISCV::ROLW:
3145 case RISCV::RORW:
3146 case RISCV::RORIW:
3147 case RISCV::CLZW:
3148 case RISCV::CTZW:
3149 case RISCV::CPOPW:
3150 case RISCV::SLLI_UW:
3151 case RISCV::FMV_W_X:
3152 case RISCV::FCVT_H_W:
3153 case RISCV::FCVT_H_WU:
3154 case RISCV::FCVT_S_W:
3155 case RISCV::FCVT_S_WU:
3156 case RISCV::FCVT_D_W:
3157 case RISCV::FCVT_D_WU:
3158 case RISCV::TH_REVW:
3159 case RISCV::TH_SRRIW:
3160 if (Bits < 32)
3161 return false;
3162 break;
3163 case RISCV::SLL:
3164 case RISCV::SRA:
3165 case RISCV::SRL:
3166 case RISCV::ROL:
3167 case RISCV::ROR:
3168 case RISCV::BSET:
3169 case RISCV::BCLR:
3170 case RISCV::BINV:
3171 // Shift amount operands only use log2(Xlen) bits.
3172 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen()))
3173 return false;
3174 break;
3175 case RISCV::SLLI:
3176 // SLLI only uses the lower (XLen - ShAmt) bits.
3177 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
3178 return false;
3179 break;
3180 case RISCV::ANDI:
3181 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1)))
3182 break;
3183 goto RecCheck;
3184 case RISCV::ORI: {
3185 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
3186 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
3187 break;
3188 [[fallthrough]];
3189 }
3190 case RISCV::AND:
3191 case RISCV::OR:
3192 case RISCV::XOR:
3193 case RISCV::XORI:
3194 case RISCV::ANDN:
3195 case RISCV::ORN:
3196 case RISCV::XNOR:
3197 case RISCV::SH1ADD:
3198 case RISCV::SH2ADD:
3199 case RISCV::SH3ADD:
3200 RecCheck:
3201 if (hasAllNBitUsers(User, Bits, Depth + 1))
3202 break;
3203 return false;
3204 case RISCV::SRLI: {
3205 unsigned ShAmt = User->getConstantOperandVal(1);
3206 // If we are shifting right by less than Bits, and users don't demand any
3207 // bits that were shifted into [Bits-1:0], then we can consider this as an
3208 // N-Bit user.
3209 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1))
3210 break;
3211 return false;
3212 }
3213 case RISCV::SEXT_B:
3214 case RISCV::PACKH:
3215 if (Bits < 8)
3216 return false;
3217 break;
3218 case RISCV::SEXT_H:
3219 case RISCV::FMV_H_X:
3220 case RISCV::ZEXT_H_RV32:
3221 case RISCV::ZEXT_H_RV64:
3222 case RISCV::PACKW:
3223 if (Bits < 16)
3224 return false;
3225 break;
3226 case RISCV::PACK:
3227 if (Bits < (Subtarget->getXLen() / 2))
3228 return false;
3229 break;
3230 case RISCV::ADD_UW:
3231 case RISCV::SH1ADD_UW:
3232 case RISCV::SH2ADD_UW:
3233 case RISCV::SH3ADD_UW:
3234 // The first operand to add.uw/shXadd.uw is implicitly zero extended from
3235 // 32 bits.
3236 if (UI.getOperandNo() != 0 || Bits < 32)
3237 return false;
3238 break;
3239 case RISCV::SB:
3240 if (UI.getOperandNo() != 0 || Bits < 8)
3241 return false;
3242 break;
3243 case RISCV::SH:
3244 if (UI.getOperandNo() != 0 || Bits < 16)
3245 return false;
3246 break;
3247 case RISCV::SW:
3248 if (UI.getOperandNo() != 0 || Bits < 32)
3249 return false;
3250 break;
3251 }
3252 }
3253
3254 return true;
3255 }
3256
3257 // Select a constant that can be represented as (sign_extend(imm5) << imm2).
selectSimm5Shl2(SDValue N,SDValue & Simm5,SDValue & Shl2)3258 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
3259 SDValue &Shl2) {
3260 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3261 int64_t Offset = C->getSExtValue();
3262 int64_t Shift;
3263 for (Shift = 0; Shift < 4; Shift++)
3264 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
3265 break;
3266
3267 // Constant cannot be encoded.
3268 if (Shift == 4)
3269 return false;
3270
3271 EVT Ty = N->getValueType(0);
3272 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
3273 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
3274 return true;
3275 }
3276
3277 return false;
3278 }
3279
3280 // Select VL as a 5 bit immediate or a value that will become a register. This
3281 // allows us to choose betwen VSETIVLI or VSETVLI later.
selectVLOp(SDValue N,SDValue & VL)3282 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
3283 auto *C = dyn_cast<ConstantSDNode>(N);
3284 if (C && isUInt<5>(C->getZExtValue())) {
3285 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
3286 N->getValueType(0));
3287 } else if (C && C->isAllOnes()) {
3288 // Treat all ones as VLMax.
3289 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3290 N->getValueType(0));
3291 } else if (isa<RegisterSDNode>(N) &&
3292 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
3293 // All our VL operands use an operand that allows GPRNoX0 or an immediate
3294 // as the register class. Convert X0 to a special immediate to pass the
3295 // MachineVerifier. This is recognized specially by the vsetvli insertion
3296 // pass.
3297 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
3298 N->getValueType(0));
3299 } else {
3300 VL = N;
3301 }
3302
3303 return true;
3304 }
3305
findVSplat(SDValue N)3306 static SDValue findVSplat(SDValue N) {
3307 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
3308 if (!N.getOperand(0).isUndef())
3309 return SDValue();
3310 N = N.getOperand(1);
3311 }
3312 SDValue Splat = N;
3313 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
3314 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
3315 !Splat.getOperand(0).isUndef())
3316 return SDValue();
3317 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
3318 return Splat;
3319 }
3320
selectVSplat(SDValue N,SDValue & SplatVal)3321 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
3322 SDValue Splat = findVSplat(N);
3323 if (!Splat)
3324 return false;
3325
3326 SplatVal = Splat.getOperand(1);
3327 return true;
3328 }
3329
selectVSplatImmHelper(SDValue N,SDValue & SplatVal,SelectionDAG & DAG,const RISCVSubtarget & Subtarget,std::function<bool (int64_t)> ValidateImm)3330 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
3331 SelectionDAG &DAG,
3332 const RISCVSubtarget &Subtarget,
3333 std::function<bool(int64_t)> ValidateImm) {
3334 SDValue Splat = findVSplat(N);
3335 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
3336 return false;
3337
3338 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
3339 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
3340 "Unexpected splat operand type");
3341
3342 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
3343 // type is wider than the resulting vector element type: an implicit
3344 // truncation first takes place. Therefore, perform a manual
3345 // truncation/sign-extension in order to ignore any truncated bits and catch
3346 // any zero-extended immediate.
3347 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
3348 // sign-extending to (XLenVT -1).
3349 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
3350
3351 int64_t SplatImm = SplatConst.getSExtValue();
3352
3353 if (!ValidateImm(SplatImm))
3354 return false;
3355
3356 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
3357 return true;
3358 }
3359
selectVSplatSimm5(SDValue N,SDValue & SplatVal)3360 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
3361 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
3362 [](int64_t Imm) { return isInt<5>(Imm); });
3363 }
3364
selectVSplatSimm5Plus1(SDValue N,SDValue & SplatVal)3365 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
3366 return selectVSplatImmHelper(
3367 N, SplatVal, *CurDAG, *Subtarget,
3368 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
3369 }
3370
selectVSplatSimm5Plus1NonZero(SDValue N,SDValue & SplatVal)3371 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
3372 SDValue &SplatVal) {
3373 return selectVSplatImmHelper(
3374 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
3375 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
3376 });
3377 }
3378
selectVSplatUimm(SDValue N,unsigned Bits,SDValue & SplatVal)3379 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
3380 SDValue &SplatVal) {
3381 return selectVSplatImmHelper(
3382 N, SplatVal, *CurDAG, *Subtarget,
3383 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
3384 }
3385
selectLow8BitsVSplat(SDValue N,SDValue & SplatVal)3386 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
3387 auto IsExtOrTrunc = [](SDValue N) {
3388 switch (N->getOpcode()) {
3389 case ISD::SIGN_EXTEND:
3390 case ISD::ZERO_EXTEND:
3391 // There's no passthru on these _VL nodes so any VL/mask is ok, since any
3392 // inactive elements will be undef.
3393 case RISCVISD::TRUNCATE_VECTOR_VL:
3394 case RISCVISD::VSEXT_VL:
3395 case RISCVISD::VZEXT_VL:
3396 return true;
3397 default:
3398 return false;
3399 }
3400 };
3401
3402 // We can have multiple nested nodes, so unravel them all if needed.
3403 while (IsExtOrTrunc(N)) {
3404 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8)
3405 return false;
3406 N = N->getOperand(0);
3407 }
3408
3409 return selectVSplat(N, SplatVal);
3410 }
3411
selectFPImm(SDValue N,SDValue & Imm)3412 bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
3413 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
3414 if (!CFP)
3415 return false;
3416 const APFloat &APF = CFP->getValueAPF();
3417 // td can handle +0.0 already.
3418 if (APF.isPosZero())
3419 return false;
3420
3421 MVT VT = CFP->getSimpleValueType(0);
3422
3423 // Even if this FPImm requires an additional FNEG (i.e. the second element of
3424 // the returned pair is true) we still prefer FLI + FNEG over immediate
3425 // materialization as the latter might generate a longer instruction sequence.
3426 if (static_cast<const RISCVTargetLowering *>(TLI)
3427 ->getLegalZfaFPImm(APF, VT)
3428 .first >= 0)
3429 return false;
3430
3431 MVT XLenVT = Subtarget->getXLenVT();
3432 if (VT == MVT::f64 && !Subtarget->is64Bit()) {
3433 assert(APF.isNegZero() && "Unexpected constant.");
3434 return false;
3435 }
3436 SDLoc DL(N);
3437 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
3438 *Subtarget);
3439 return true;
3440 }
3441
selectRVVSimm5(SDValue N,unsigned Width,SDValue & Imm)3442 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
3443 SDValue &Imm) {
3444 if (auto *C = dyn_cast<ConstantSDNode>(N)) {
3445 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width);
3446
3447 if (!isInt<5>(ImmVal))
3448 return false;
3449
3450 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT());
3451 return true;
3452 }
3453
3454 return false;
3455 }
3456
3457 // Try to remove sext.w if the input is a W instruction or can be made into
3458 // a W instruction cheaply.
doPeepholeSExtW(SDNode * N)3459 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
3460 // Look for the sext.w pattern, addiw rd, rs1, 0.
3461 if (N->getMachineOpcode() != RISCV::ADDIW ||
3462 !isNullConstant(N->getOperand(1)))
3463 return false;
3464
3465 SDValue N0 = N->getOperand(0);
3466 if (!N0.isMachineOpcode())
3467 return false;
3468
3469 switch (N0.getMachineOpcode()) {
3470 default:
3471 break;
3472 case RISCV::ADD:
3473 case RISCV::ADDI:
3474 case RISCV::SUB:
3475 case RISCV::MUL:
3476 case RISCV::SLLI: {
3477 // Convert sext.w+add/sub/mul to their W instructions. This will create
3478 // a new independent instruction. This improves latency.
3479 unsigned Opc;
3480 switch (N0.getMachineOpcode()) {
3481 default:
3482 llvm_unreachable("Unexpected opcode!");
3483 case RISCV::ADD: Opc = RISCV::ADDW; break;
3484 case RISCV::ADDI: Opc = RISCV::ADDIW; break;
3485 case RISCV::SUB: Opc = RISCV::SUBW; break;
3486 case RISCV::MUL: Opc = RISCV::MULW; break;
3487 case RISCV::SLLI: Opc = RISCV::SLLIW; break;
3488 }
3489
3490 SDValue N00 = N0.getOperand(0);
3491 SDValue N01 = N0.getOperand(1);
3492
3493 // Shift amount needs to be uimm5.
3494 if (N0.getMachineOpcode() == RISCV::SLLI &&
3495 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
3496 break;
3497
3498 SDNode *Result =
3499 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
3500 N00, N01);
3501 ReplaceUses(N, Result);
3502 return true;
3503 }
3504 case RISCV::ADDW:
3505 case RISCV::ADDIW:
3506 case RISCV::SUBW:
3507 case RISCV::MULW:
3508 case RISCV::SLLIW:
3509 case RISCV::PACKW:
3510 case RISCV::TH_MULAW:
3511 case RISCV::TH_MULAH:
3512 case RISCV::TH_MULSW:
3513 case RISCV::TH_MULSH:
3514 if (N0.getValueType() == MVT::i32)
3515 break;
3516
3517 // Result is already sign extended just remove the sext.w.
3518 // NOTE: We only handle the nodes that are selected with hasAllWUsers.
3519 ReplaceUses(N, N0.getNode());
3520 return true;
3521 }
3522
3523 return false;
3524 }
3525
3526 // After ISel, a vector pseudo's mask will be copied to V0 via a CopyToReg
3527 // that's glued to the pseudo. This tries to look up the value that was copied
3528 // to V0.
getMaskSetter(SDValue MaskOp,SDValue GlueOp)3529 static SDValue getMaskSetter(SDValue MaskOp, SDValue GlueOp) {
3530 // Check that we're using V0 as a mask register.
3531 if (!isa<RegisterSDNode>(MaskOp) ||
3532 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
3533 return SDValue();
3534
3535 // The glued user defines V0.
3536 const auto *Glued = GlueOp.getNode();
3537
3538 if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
3539 return SDValue();
3540
3541 // Check that we're defining V0 as a mask register.
3542 if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
3543 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
3544 return SDValue();
3545
3546 SDValue MaskSetter = Glued->getOperand(2);
3547
3548 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
3549 // from an extract_subvector or insert_subvector.
3550 if (MaskSetter->isMachineOpcode() &&
3551 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
3552 MaskSetter = MaskSetter->getOperand(0);
3553
3554 return MaskSetter;
3555 }
3556
usesAllOnesMask(SDValue MaskOp,SDValue GlueOp)3557 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
3558 // Check the instruction defining V0; it needs to be a VMSET pseudo.
3559 SDValue MaskSetter = getMaskSetter(MaskOp, GlueOp);
3560 if (!MaskSetter)
3561 return false;
3562
3563 const auto IsVMSet = [](unsigned Opc) {
3564 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
3565 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
3566 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
3567 Opc == RISCV::PseudoVMSET_M_B8;
3568 };
3569
3570 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
3571 // undefined behaviour if it's the wrong bitwidth, so we could choose to
3572 // assume that it's all-ones? Same applies to its VL.
3573 return MaskSetter->isMachineOpcode() &&
3574 IsVMSet(MaskSetter.getMachineOpcode());
3575 }
3576
3577 // Return true if we can make sure mask of N is all-ones mask.
usesAllOnesMask(SDNode * N,unsigned MaskOpIdx)3578 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
3579 return usesAllOnesMask(N->getOperand(MaskOpIdx),
3580 N->getOperand(N->getNumOperands() - 1));
3581 }
3582
isImplicitDef(SDValue V)3583 static bool isImplicitDef(SDValue V) {
3584 if (!V.isMachineOpcode())
3585 return false;
3586 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) {
3587 for (unsigned I = 1; I < V.getNumOperands(); I += 2)
3588 if (!isImplicitDef(V.getOperand(I)))
3589 return false;
3590 return true;
3591 }
3592 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
3593 }
3594
3595 // Optimize masked RVV pseudo instructions with a known all-ones mask to their
3596 // corresponding "unmasked" pseudo versions. The mask we're interested in will
3597 // take the form of a V0 physical register operand, with a glued
3598 // register-setting instruction.
doPeepholeMaskedRVV(MachineSDNode * N)3599 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
3600 const RISCV::RISCVMaskedPseudoInfo *I =
3601 RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
3602 if (!I)
3603 return false;
3604
3605 unsigned MaskOpIdx = I->MaskOpIdx;
3606 if (!usesAllOnesMask(N, MaskOpIdx))
3607 return false;
3608
3609 // There are two classes of pseudos in the table - compares and
3610 // everything else. See the comment on RISCVMaskedPseudo for details.
3611 const unsigned Opc = I->UnmaskedPseudo;
3612 const MCInstrDesc &MCID = TII->get(Opc);
3613 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
3614 #ifndef NDEBUG
3615 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
3616 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
3617 RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
3618 "Masked and unmasked pseudos are inconsistent");
3619 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
3620 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
3621 #endif
3622
3623 SmallVector<SDValue, 8> Ops;
3624 // Skip the merge operand at index 0 if !UseTUPseudo.
3625 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
3626 // Skip the mask, and the Glue.
3627 SDValue Op = N->getOperand(I);
3628 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
3629 continue;
3630 Ops.push_back(Op);
3631 }
3632
3633 // Transitively apply any node glued to our new node.
3634 const auto *Glued = N->getGluedNode();
3635 if (auto *TGlued = Glued->getGluedNode())
3636 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
3637
3638 MachineSDNode *Result =
3639 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3640
3641 if (!N->memoperands_empty())
3642 CurDAG->setNodeMemRefs(Result, N->memoperands());
3643
3644 Result->setFlags(N->getFlags());
3645 ReplaceUses(N, Result);
3646
3647 return true;
3648 }
3649
IsVMerge(SDNode * N)3650 static bool IsVMerge(SDNode *N) {
3651 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
3652 }
3653
IsVMv(SDNode * N)3654 static bool IsVMv(SDNode *N) {
3655 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
3656 }
3657
GetVMSetForLMul(RISCVII::VLMUL LMUL)3658 static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
3659 switch (LMUL) {
3660 case RISCVII::LMUL_F8:
3661 return RISCV::PseudoVMSET_M_B1;
3662 case RISCVII::LMUL_F4:
3663 return RISCV::PseudoVMSET_M_B2;
3664 case RISCVII::LMUL_F2:
3665 return RISCV::PseudoVMSET_M_B4;
3666 case RISCVII::LMUL_1:
3667 return RISCV::PseudoVMSET_M_B8;
3668 case RISCVII::LMUL_2:
3669 return RISCV::PseudoVMSET_M_B16;
3670 case RISCVII::LMUL_4:
3671 return RISCV::PseudoVMSET_M_B32;
3672 case RISCVII::LMUL_8:
3673 return RISCV::PseudoVMSET_M_B64;
3674 case RISCVII::LMUL_RESERVED:
3675 llvm_unreachable("Unexpected LMUL");
3676 }
3677 llvm_unreachable("Unknown VLMUL enum");
3678 }
3679
3680 // Try to fold away VMERGE_VVM instructions into their true operands:
3681 //
3682 // %true = PseudoVADD_VV ...
3683 // %x = PseudoVMERGE_VVM %false, %false, %true, %mask
3684 // ->
3685 // %x = PseudoVADD_VV_MASK %false, ..., %mask
3686 //
3687 // We can only fold if vmerge's merge operand, vmerge's false operand and
3688 // %true's merge operand (if it has one) are the same. This is because we have
3689 // to consolidate them into one merge operand in the result.
3690 //
3691 // If %true is masked, then we can use its mask instead of vmerge's if vmerge's
3692 // mask is all ones.
3693 //
3694 // We can also fold a VMV_V_V into its true operand, since it is equivalent to a
3695 // VMERGE_VVM with an all ones mask.
3696 //
3697 // The resulting VL is the minimum of the two VLs.
3698 //
3699 // The resulting policy is the effective policy the vmerge would have had,
3700 // i.e. whether or not it's merge operand was implicit-def.
performCombineVMergeAndVOps(SDNode * N)3701 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
3702 SDValue Merge, False, True, VL, Mask, Glue;
3703 // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
3704 if (IsVMv(N)) {
3705 Merge = N->getOperand(0);
3706 False = N->getOperand(0);
3707 True = N->getOperand(1);
3708 VL = N->getOperand(2);
3709 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
3710 // mask later below.
3711 } else {
3712 assert(IsVMerge(N));
3713 Merge = N->getOperand(0);
3714 False = N->getOperand(1);
3715 True = N->getOperand(2);
3716 Mask = N->getOperand(3);
3717 VL = N->getOperand(4);
3718 // We always have a glue node for the mask at v0.
3719 Glue = N->getOperand(N->getNumOperands() - 1);
3720 }
3721 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
3722 assert(!Glue || Glue.getValueType() == MVT::Glue);
3723
3724 // If the EEW of True is different from vmerge's SEW, then we can't fold.
3725 if (True.getSimpleValueType() != N->getSimpleValueType(0))
3726 return false;
3727
3728 // We require that either merge and false are the same, or that merge
3729 // is undefined.
3730 if (Merge != False && !isImplicitDef(Merge))
3731 return false;
3732
3733 assert(True.getResNo() == 0 &&
3734 "Expect True is the first output of an instruction.");
3735
3736 // Need N is the exactly one using True.
3737 if (!True.hasOneUse())
3738 return false;
3739
3740 if (!True.isMachineOpcode())
3741 return false;
3742
3743 unsigned TrueOpc = True.getMachineOpcode();
3744 const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
3745 uint64_t TrueTSFlags = TrueMCID.TSFlags;
3746 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
3747
3748 bool IsMasked = false;
3749 const RISCV::RISCVMaskedPseudoInfo *Info =
3750 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
3751 if (!Info && HasTiedDest) {
3752 Info = RISCV::getMaskedPseudoInfo(TrueOpc);
3753 IsMasked = true;
3754 }
3755 assert(!(IsMasked && !HasTiedDest) && "Expected tied dest");
3756
3757 if (!Info)
3758 return false;
3759
3760 // If True has a merge operand then it needs to be the same as vmerge's False,
3761 // since False will be used for the result's merge operand.
3762 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
3763 SDValue MergeOpTrue = True->getOperand(0);
3764 if (False != MergeOpTrue)
3765 return false;
3766 }
3767
3768 // If True is masked then the vmerge must have either the same mask or an all
3769 // 1s mask, since we're going to keep the mask from True.
3770 if (IsMasked && Mask) {
3771 // FIXME: Support mask agnostic True instruction which would have an
3772 // undef merge operand.
3773 SDValue TrueMask =
3774 getMaskSetter(True->getOperand(Info->MaskOpIdx),
3775 True->getOperand(True->getNumOperands() - 1));
3776 assert(TrueMask);
3777 if (!usesAllOnesMask(Mask, Glue) && getMaskSetter(Mask, Glue) != TrueMask)
3778 return false;
3779 }
3780
3781 // Skip if True has side effect.
3782 if (TII->get(TrueOpc).hasUnmodeledSideEffects())
3783 return false;
3784
3785 // The last operand of a masked instruction may be glued.
3786 bool HasGlueOp = True->getGluedNode() != nullptr;
3787
3788 // The chain operand may exist either before the glued operands or in the last
3789 // position.
3790 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
3791 bool HasChainOp =
3792 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
3793
3794 if (HasChainOp) {
3795 // Avoid creating cycles in the DAG. We must ensure that none of the other
3796 // operands depend on True through it's Chain.
3797 SmallVector<const SDNode *, 4> LoopWorklist;
3798 SmallPtrSet<const SDNode *, 16> Visited;
3799 LoopWorklist.push_back(False.getNode());
3800 if (Mask)
3801 LoopWorklist.push_back(Mask.getNode());
3802 LoopWorklist.push_back(VL.getNode());
3803 if (Glue)
3804 LoopWorklist.push_back(Glue.getNode());
3805 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
3806 return false;
3807 }
3808
3809 // The vector policy operand may be present for masked intrinsics
3810 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
3811 unsigned TrueVLIndex =
3812 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
3813 SDValue TrueVL = True.getOperand(TrueVLIndex);
3814 SDValue SEW = True.getOperand(TrueVLIndex + 1);
3815
3816 auto GetMinVL = [](SDValue LHS, SDValue RHS) {
3817 if (LHS == RHS)
3818 return LHS;
3819 if (isAllOnesConstant(LHS))
3820 return RHS;
3821 if (isAllOnesConstant(RHS))
3822 return LHS;
3823 auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
3824 auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
3825 if (!CLHS || !CRHS)
3826 return SDValue();
3827 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
3828 };
3829
3830 // Because N and True must have the same merge operand (or True's operand is
3831 // implicit_def), the "effective" body is the minimum of their VLs.
3832 SDValue OrigVL = VL;
3833 VL = GetMinVL(TrueVL, VL);
3834 if (!VL)
3835 return false;
3836
3837 // Some operations produce different elementwise results depending on the
3838 // active elements, like viota.m or vredsum. This transformation is illegal
3839 // for these if we change the active elements (i.e. mask or VL).
3840 if (Info->ActiveElementsAffectResult) {
3841 if (Mask && !usesAllOnesMask(Mask, Glue))
3842 return false;
3843 if (TrueVL != VL)
3844 return false;
3845 }
3846
3847 // If we end up changing the VL or mask of True, then we need to make sure it
3848 // doesn't raise any observable fp exceptions, since changing the active
3849 // elements will affect how fflags is set.
3850 if (TrueVL != VL || !IsMasked)
3851 if (mayRaiseFPException(True.getNode()) &&
3852 !True->getFlags().hasNoFPExcept())
3853 return false;
3854
3855 SDLoc DL(N);
3856
3857 // From the preconditions we checked above, we know the mask and thus glue
3858 // for the result node will be taken from True.
3859 if (IsMasked) {
3860 Mask = True->getOperand(Info->MaskOpIdx);
3861 Glue = True->getOperand(True->getNumOperands() - 1);
3862 assert(Glue.getValueType() == MVT::Glue);
3863 }
3864 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
3865 // an all-ones mask to use.
3866 else if (IsVMv(N)) {
3867 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
3868 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
3869 ElementCount EC = N->getValueType(0).getVectorElementCount();
3870 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
3871
3872 SDValue AllOnesMask =
3873 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
3874 SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
3875 RISCV::V0, AllOnesMask, SDValue());
3876 Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
3877 Glue = MaskCopy.getValue(1);
3878 }
3879
3880 unsigned MaskedOpc = Info->MaskedPseudo;
3881 #ifndef NDEBUG
3882 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
3883 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
3884 "Expected instructions with mask have policy operand.");
3885 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
3886 MCOI::TIED_TO) == 0 &&
3887 "Expected instructions with mask have a tied dest.");
3888 #endif
3889
3890 // Use a tumu policy, relaxing it to tail agnostic provided that the merge
3891 // operand is undefined.
3892 //
3893 // However, if the VL became smaller than what the vmerge had originally, then
3894 // elements past VL that were previously in the vmerge's body will have moved
3895 // to the tail. In that case we always need to use tail undisturbed to
3896 // preserve them.
3897 bool MergeVLShrunk = VL != OrigVL;
3898 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
3899 ? RISCVII::TAIL_AGNOSTIC
3900 : /*TUMU*/ 0;
3901 SDValue PolicyOp =
3902 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
3903
3904
3905 SmallVector<SDValue, 8> Ops;
3906 Ops.push_back(False);
3907
3908 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
3909 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
3910 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
3911 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
3912
3913 Ops.push_back(Mask);
3914
3915 // For unmasked "VOp" with rounding mode operand, that is interfaces like
3916 // (..., rm, vl) or (..., rm, vl, policy).
3917 // Its masked version is (..., vm, rm, vl, policy).
3918 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
3919 if (HasRoundingMode)
3920 Ops.push_back(True->getOperand(TrueVLIndex - 1));
3921
3922 Ops.append({VL, SEW, PolicyOp});
3923
3924 // Result node should have chain operand of True.
3925 if (HasChainOp)
3926 Ops.push_back(True.getOperand(TrueChainOpIdx));
3927
3928 // Add the glue for the CopyToReg of mask->v0.
3929 Ops.push_back(Glue);
3930
3931 MachineSDNode *Result =
3932 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
3933 Result->setFlags(True->getFlags());
3934
3935 if (!cast<MachineSDNode>(True)->memoperands_empty())
3936 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
3937
3938 // Replace vmerge.vvm node by Result.
3939 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3940
3941 // Replace another value of True. E.g. chain and VL.
3942 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
3943 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
3944
3945 return true;
3946 }
3947
doPeepholeMergeVVMFold()3948 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
3949 bool MadeChange = false;
3950 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3951
3952 while (Position != CurDAG->allnodes_begin()) {
3953 SDNode *N = &*--Position;
3954 if (N->use_empty() || !N->isMachineOpcode())
3955 continue;
3956
3957 if (IsVMerge(N) || IsVMv(N))
3958 MadeChange |= performCombineVMergeAndVOps(N);
3959 }
3960 return MadeChange;
3961 }
3962
3963 /// If our passthru is an implicit_def, use noreg instead. This side
3964 /// steps issues with MachineCSE not being able to CSE expressions with
3965 /// IMPLICIT_DEF operands while preserving the semantic intent. See
3966 /// pr64282 for context. Note that this transform is the last one
3967 /// performed at ISEL DAG to DAG.
doPeepholeNoRegPassThru()3968 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
3969 bool MadeChange = false;
3970 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
3971
3972 while (Position != CurDAG->allnodes_begin()) {
3973 SDNode *N = &*--Position;
3974 if (N->use_empty() || !N->isMachineOpcode())
3975 continue;
3976
3977 const unsigned Opc = N->getMachineOpcode();
3978 if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
3979 !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
3980 !isImplicitDef(N->getOperand(0)))
3981 continue;
3982
3983 SmallVector<SDValue> Ops;
3984 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
3985 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
3986 SDValue Op = N->getOperand(I);
3987 Ops.push_back(Op);
3988 }
3989
3990 MachineSDNode *Result =
3991 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3992 Result->setFlags(N->getFlags());
3993 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
3994 ReplaceUses(N, Result);
3995 MadeChange = true;
3996 }
3997 return MadeChange;
3998 }
3999
4000
4001 // This pass converts a legalized DAG into a RISCV-specific DAG, ready
4002 // for instruction scheduling.
createRISCVISelDag(RISCVTargetMachine & TM,CodeGenOptLevel OptLevel)4003 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
4004 CodeGenOptLevel OptLevel) {
4005 return new RISCVDAGToDAGISelLegacy(TM, OptLevel);
4006 }
4007
4008 char RISCVDAGToDAGISelLegacy::ID = 0;
4009
RISCVDAGToDAGISelLegacy(RISCVTargetMachine & TM,CodeGenOptLevel OptLevel)4010 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM,
4011 CodeGenOptLevel OptLevel)
4012 : SelectionDAGISelLegacy(
4013 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {}
4014
4015 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
4016