1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISC-V target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVBaseInfo.h" 15 #include "MCTargetDesc/RISCVMCTargetDesc.h" 16 #include "MCTargetDesc/RISCVMatInt.h" 17 #include "RISCVISelLowering.h" 18 #include "RISCVMachineFunctionInfo.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/IR/IntrinsicsRISCV.h" 21 #include "llvm/Support/Alignment.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" 30 31 static cl::opt<bool> UsePseudoMovImm( 32 "riscv-use-rematerializable-movimm", cl::Hidden, 33 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " 34 "constant materialization"), 35 cl::init(false)); 36 37 namespace llvm::RISCV { 38 #define GET_RISCVVSSEGTable_IMPL 39 #define GET_RISCVVLSEGTable_IMPL 40 #define GET_RISCVVLXSEGTable_IMPL 41 #define GET_RISCVVSXSEGTable_IMPL 42 #define GET_RISCVVLETable_IMPL 43 #define GET_RISCVVSETable_IMPL 44 #define GET_RISCVVLXTable_IMPL 45 #define GET_RISCVVSXTable_IMPL 46 #define GET_RISCVMaskedPseudosTable_IMPL 47 #include "RISCVGenSearchableTables.inc" 48 } // namespace llvm::RISCV 49 50 void RISCVDAGToDAGISel::PreprocessISelDAG() { 51 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 52 53 bool MadeChange = false; 54 while (Position != CurDAG->allnodes_begin()) { 55 SDNode *N = &*--Position; 56 if (N->use_empty()) 57 continue; 58 59 SDValue Result; 60 switch (N->getOpcode()) { 61 case ISD::SPLAT_VECTOR: { 62 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 63 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 64 MVT VT = N->getSimpleValueType(0); 65 unsigned Opc = 66 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 67 SDLoc DL(N); 68 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 69 SDValue Src = N->getOperand(0); 70 if (VT.isInteger()) 71 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(), 72 N->getOperand(0)); 73 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL); 74 break; 75 } 76 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { 77 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 78 // load. Done after lowering and combining so that we have a chance to 79 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 80 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 81 MVT VT = N->getSimpleValueType(0); 82 SDValue Passthru = N->getOperand(0); 83 SDValue Lo = N->getOperand(1); 84 SDValue Hi = N->getOperand(2); 85 SDValue VL = N->getOperand(3); 86 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 87 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 88 "Unexpected VTs!"); 89 MachineFunction &MF = CurDAG->getMachineFunction(); 90 SDLoc DL(N); 91 92 // Create temporary stack for each expanding node. 93 SDValue StackSlot = 94 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8)); 95 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex(); 96 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 97 98 SDValue Chain = CurDAG->getEntryNode(); 99 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 100 101 SDValue OffsetSlot = 102 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL); 103 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 104 Align(8)); 105 106 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 107 108 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 109 SDValue IntID = 110 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 111 SDValue Ops[] = {Chain, 112 IntID, 113 Passthru, 114 StackSlot, 115 CurDAG->getRegister(RISCV::X0, MVT::i64), 116 VL}; 117 118 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 119 MVT::i64, MPI, Align(8), 120 MachineMemOperand::MOLoad); 121 break; 122 } 123 } 124 125 if (Result) { 126 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: "); 127 LLVM_DEBUG(N->dump(CurDAG)); 128 LLVM_DEBUG(dbgs() << "\nNew: "); 129 LLVM_DEBUG(Result->dump(CurDAG)); 130 LLVM_DEBUG(dbgs() << "\n"); 131 132 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 133 MadeChange = true; 134 } 135 } 136 137 if (MadeChange) 138 CurDAG->RemoveDeadNodes(); 139 } 140 141 void RISCVDAGToDAGISel::PostprocessISelDAG() { 142 HandleSDNode Dummy(CurDAG->getRoot()); 143 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 144 145 bool MadeChange = false; 146 while (Position != CurDAG->allnodes_begin()) { 147 SDNode *N = &*--Position; 148 // Skip dead nodes and any non-machine opcodes. 149 if (N->use_empty() || !N->isMachineOpcode()) 150 continue; 151 152 MadeChange |= doPeepholeSExtW(N); 153 154 // FIXME: This is here only because the VMerge transform doesn't 155 // know how to handle masked true inputs. Once that has been moved 156 // to post-ISEL, this can be deleted as well. 157 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N)); 158 } 159 160 CurDAG->setRoot(Dummy.getValue()); 161 162 MadeChange |= doPeepholeMergeVVMFold(); 163 164 // After we're done with everything else, convert IMPLICIT_DEF 165 // passthru operands to NoRegister. This is required to workaround 166 // an optimization deficiency in MachineCSE. This really should 167 // be merged back into each of the patterns (i.e. there's no good 168 // reason not to go directly to NoReg), but is being done this way 169 // to allow easy backporting. 170 MadeChange |= doPeepholeNoRegPassThru(); 171 172 if (MadeChange) 173 CurDAG->RemoveDeadNodes(); 174 } 175 176 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 177 RISCVMatInt::InstSeq &Seq) { 178 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 179 for (const RISCVMatInt::Inst &Inst : Seq) { 180 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT); 181 SDNode *Result = nullptr; 182 switch (Inst.getOpndKind()) { 183 case RISCVMatInt::Imm: 184 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm); 185 break; 186 case RISCVMatInt::RegX0: 187 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, 188 CurDAG->getRegister(RISCV::X0, VT)); 189 break; 190 case RISCVMatInt::RegReg: 191 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg); 192 break; 193 case RISCVMatInt::RegImm: 194 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm); 195 break; 196 } 197 198 // Only the first instruction has X0 as its source. 199 SrcReg = SDValue(Result, 0); 200 } 201 202 return SrcReg; 203 } 204 205 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 206 int64_t Imm, const RISCVSubtarget &Subtarget) { 207 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); 208 209 // Use a rematerializable pseudo instruction for short sequences if enabled. 210 if (Seq.size() == 2 && UsePseudoMovImm) 211 return SDValue( 212 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT, 213 CurDAG->getTargetConstant(Imm, DL, VT)), 214 0); 215 216 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at 217 // worst an LUI+ADDIW. This will require an extra register, but avoids a 218 // constant pool. 219 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 220 // low and high 32 bits are the same and bit 31 and 63 are set. 221 if (Seq.size() > 3) { 222 unsigned ShiftAmt, AddOpc; 223 RISCVMatInt::InstSeq SeqLo = 224 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); 225 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) { 226 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); 227 228 SDValue SLLI = SDValue( 229 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, 230 CurDAG->getTargetConstant(ShiftAmt, DL, VT)), 231 0); 232 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0); 233 } 234 } 235 236 // Otherwise, use the original sequence. 237 return selectImmSeq(CurDAG, DL, VT, Seq); 238 } 239 240 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 241 unsigned NF, RISCVII::VLMUL LMUL) { 242 static const unsigned M1TupleRegClassIDs[] = { 243 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 244 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 245 RISCV::VRN8M1RegClassID}; 246 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, 247 RISCV::VRN3M2RegClassID, 248 RISCV::VRN4M2RegClassID}; 249 250 assert(Regs.size() >= 2 && Regs.size() <= 8); 251 252 unsigned RegClassID; 253 unsigned SubReg0; 254 switch (LMUL) { 255 default: 256 llvm_unreachable("Invalid LMUL."); 257 case RISCVII::VLMUL::LMUL_F8: 258 case RISCVII::VLMUL::LMUL_F4: 259 case RISCVII::VLMUL::LMUL_F2: 260 case RISCVII::VLMUL::LMUL_1: 261 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 262 "Unexpected subreg numbering"); 263 SubReg0 = RISCV::sub_vrm1_0; 264 RegClassID = M1TupleRegClassIDs[NF - 2]; 265 break; 266 case RISCVII::VLMUL::LMUL_2: 267 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 268 "Unexpected subreg numbering"); 269 SubReg0 = RISCV::sub_vrm2_0; 270 RegClassID = M2TupleRegClassIDs[NF - 2]; 271 break; 272 case RISCVII::VLMUL::LMUL_4: 273 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 274 "Unexpected subreg numbering"); 275 SubReg0 = RISCV::sub_vrm4_0; 276 RegClassID = RISCV::VRN2M4RegClassID; 277 break; 278 } 279 280 SDLoc DL(Regs[0]); 281 SmallVector<SDValue, 8> Ops; 282 283 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 284 285 for (unsigned I = 0; I < Regs.size(); ++I) { 286 Ops.push_back(Regs[I]); 287 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 288 } 289 SDNode *N = 290 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 291 return SDValue(N, 0); 292 } 293 294 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 295 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 296 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 297 bool IsLoad, MVT *IndexVT) { 298 SDValue Chain = Node->getOperand(0); 299 SDValue Glue; 300 301 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 302 303 if (IsStridedOrIndexed) { 304 Operands.push_back(Node->getOperand(CurOp++)); // Index. 305 if (IndexVT) 306 *IndexVT = Operands.back()->getSimpleValueType(0); 307 } 308 309 if (IsMasked) { 310 // Mask needs to be copied to V0. 311 SDValue Mask = Node->getOperand(CurOp++); 312 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 313 Glue = Chain.getValue(1); 314 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 315 } 316 SDValue VL; 317 selectVLOp(Node->getOperand(CurOp++), VL); 318 Operands.push_back(VL); 319 320 MVT XLenVT = Subtarget->getXLenVT(); 321 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 322 Operands.push_back(SEWOp); 323 324 // At the IR layer, all the masked load intrinsics have policy operands, 325 // none of the others do. All have passthru operands. For our pseudos, 326 // all loads have policy operands. 327 if (IsLoad) { 328 uint64_t Policy = RISCVII::MASK_AGNOSTIC; 329 if (IsMasked) 330 Policy = Node->getConstantOperandVal(CurOp++); 331 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 332 Operands.push_back(PolicyOp); 333 } 334 335 Operands.push_back(Chain); // Chain. 336 if (Glue) 337 Operands.push_back(Glue); 338 } 339 340 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 341 bool IsStrided) { 342 SDLoc DL(Node); 343 unsigned NF = Node->getNumValues() - 1; 344 MVT VT = Node->getSimpleValueType(0); 345 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 346 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 347 348 unsigned CurOp = 2; 349 SmallVector<SDValue, 8> Operands; 350 351 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 352 Node->op_begin() + CurOp + NF); 353 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 354 Operands.push_back(Merge); 355 CurOp += NF; 356 357 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 358 Operands, /*IsLoad=*/true); 359 360 const RISCV::VLSEGPseudo *P = 361 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 362 static_cast<unsigned>(LMUL)); 363 MachineSDNode *Load = 364 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 365 366 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 367 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 368 369 SDValue SuperReg = SDValue(Load, 0); 370 for (unsigned I = 0; I < NF; ++I) { 371 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 372 ReplaceUses(SDValue(Node, I), 373 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 374 } 375 376 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 377 CurDAG->RemoveDeadNode(Node); 378 } 379 380 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 381 SDLoc DL(Node); 382 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 383 MVT VT = Node->getSimpleValueType(0); 384 MVT XLenVT = Subtarget->getXLenVT(); 385 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 386 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 387 388 unsigned CurOp = 2; 389 SmallVector<SDValue, 7> Operands; 390 391 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 392 Node->op_begin() + CurOp + NF); 393 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 394 Operands.push_back(MaskedOff); 395 CurOp += NF; 396 397 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 398 /*IsStridedOrIndexed*/ false, Operands, 399 /*IsLoad=*/true); 400 401 const RISCV::VLSEGPseudo *P = 402 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 403 Log2SEW, static_cast<unsigned>(LMUL)); 404 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 405 XLenVT, MVT::Other, Operands); 406 407 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 408 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 409 410 SDValue SuperReg = SDValue(Load, 0); 411 for (unsigned I = 0; I < NF; ++I) { 412 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 413 ReplaceUses(SDValue(Node, I), 414 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 415 } 416 417 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 418 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 419 CurDAG->RemoveDeadNode(Node); 420 } 421 422 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 423 bool IsOrdered) { 424 SDLoc DL(Node); 425 unsigned NF = Node->getNumValues() - 1; 426 MVT VT = Node->getSimpleValueType(0); 427 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 428 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 429 430 unsigned CurOp = 2; 431 SmallVector<SDValue, 8> Operands; 432 433 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 434 Node->op_begin() + CurOp + NF); 435 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 436 Operands.push_back(MaskedOff); 437 CurOp += NF; 438 439 MVT IndexVT; 440 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 441 /*IsStridedOrIndexed*/ true, Operands, 442 /*IsLoad=*/true, &IndexVT); 443 444 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 445 "Element count mismatch"); 446 447 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 448 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 449 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 450 report_fatal_error("The V extension does not support EEW=64 for index " 451 "values when XLEN=32"); 452 } 453 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 454 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 455 static_cast<unsigned>(IndexLMUL)); 456 MachineSDNode *Load = 457 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 458 459 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 460 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 461 462 SDValue SuperReg = SDValue(Load, 0); 463 for (unsigned I = 0; I < NF; ++I) { 464 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 465 ReplaceUses(SDValue(Node, I), 466 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 467 } 468 469 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 470 CurDAG->RemoveDeadNode(Node); 471 } 472 473 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 474 bool IsStrided) { 475 SDLoc DL(Node); 476 unsigned NF = Node->getNumOperands() - 4; 477 if (IsStrided) 478 NF--; 479 if (IsMasked) 480 NF--; 481 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 482 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 483 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 484 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 485 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 486 487 SmallVector<SDValue, 8> Operands; 488 Operands.push_back(StoreVal); 489 unsigned CurOp = 2 + NF; 490 491 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 492 Operands); 493 494 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 495 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 496 MachineSDNode *Store = 497 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 498 499 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 500 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 501 502 ReplaceNode(Node, Store); 503 } 504 505 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 506 bool IsOrdered) { 507 SDLoc DL(Node); 508 unsigned NF = Node->getNumOperands() - 5; 509 if (IsMasked) 510 --NF; 511 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 512 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 513 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 514 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 515 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 516 517 SmallVector<SDValue, 8> Operands; 518 Operands.push_back(StoreVal); 519 unsigned CurOp = 2 + NF; 520 521 MVT IndexVT; 522 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 523 /*IsStridedOrIndexed*/ true, Operands, 524 /*IsLoad=*/false, &IndexVT); 525 526 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 527 "Element count mismatch"); 528 529 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 530 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 531 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 532 report_fatal_error("The V extension does not support EEW=64 for index " 533 "values when XLEN=32"); 534 } 535 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 536 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 537 static_cast<unsigned>(IndexLMUL)); 538 MachineSDNode *Store = 539 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 540 541 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 542 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 543 544 ReplaceNode(Node, Store); 545 } 546 547 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 548 if (!Subtarget->hasVInstructions()) 549 return; 550 551 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode"); 552 553 SDLoc DL(Node); 554 MVT XLenVT = Subtarget->getXLenVT(); 555 556 unsigned IntNo = Node->getConstantOperandVal(0); 557 558 assert((IntNo == Intrinsic::riscv_vsetvli || 559 IntNo == Intrinsic::riscv_vsetvlimax) && 560 "Unexpected vsetvli intrinsic"); 561 562 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; 563 unsigned Offset = (VLMax ? 1 : 2); 564 565 assert(Node->getNumOperands() == Offset + 2 && 566 "Unexpected number of operands"); 567 568 unsigned SEW = 569 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 570 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 571 Node->getConstantOperandVal(Offset + 1) & 0x7); 572 573 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 574 /*MaskAgnostic*/ true); 575 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 576 577 SDValue VLOperand; 578 unsigned Opcode = RISCV::PseudoVSETVLI; 579 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { 580 const unsigned VLEN = Subtarget->getRealMinVLen(); 581 if (VLEN == Subtarget->getRealMaxVLen()) 582 if (VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) 583 VLMax = true; 584 } 585 if (VLMax || isAllOnesConstant(Node->getOperand(1))) { 586 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 587 Opcode = RISCV::PseudoVSETVLIX0; 588 } else { 589 VLOperand = Node->getOperand(1); 590 591 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 592 uint64_t AVL = C->getZExtValue(); 593 if (isUInt<5>(AVL)) { 594 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 595 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, 596 XLenVT, VLImm, VTypeIOp)); 597 return; 598 } 599 } 600 } 601 602 ReplaceNode(Node, 603 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp)); 604 } 605 606 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { 607 MVT VT = Node->getSimpleValueType(0); 608 unsigned Opcode = Node->getOpcode(); 609 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) && 610 "Unexpected opcode"); 611 SDLoc DL(Node); 612 613 // For operations of the form (x << C1) op C2, check if we can use 614 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1. 615 SDValue N0 = Node->getOperand(0); 616 SDValue N1 = Node->getOperand(1); 617 618 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 619 if (!Cst) 620 return false; 621 622 int64_t Val = Cst->getSExtValue(); 623 624 // Check if immediate can already use ANDI/ORI/XORI. 625 if (isInt<12>(Val)) 626 return false; 627 628 SDValue Shift = N0; 629 630 // If Val is simm32 and we have a sext_inreg from i32, then the binop 631 // produces at least 33 sign bits. We can peek through the sext_inreg and use 632 // a SLLIW at the end. 633 bool SignExt = false; 634 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 635 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) { 636 SignExt = true; 637 Shift = N0.getOperand(0); 638 } 639 640 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) 641 return false; 642 643 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 644 if (!ShlCst) 645 return false; 646 647 uint64_t ShAmt = ShlCst->getZExtValue(); 648 649 // Make sure that we don't change the operation by removing bits. 650 // This only matters for OR and XOR, AND is unaffected. 651 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt); 652 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 653 return false; 654 655 int64_t ShiftedVal = Val >> ShAmt; 656 if (!isInt<12>(ShiftedVal)) 657 return false; 658 659 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW. 660 if (SignExt && ShAmt >= 32) 661 return false; 662 663 // Ok, we can reorder to get a smaller immediate. 664 unsigned BinOpc; 665 switch (Opcode) { 666 default: llvm_unreachable("Unexpected opcode"); 667 case ISD::AND: BinOpc = RISCV::ANDI; break; 668 case ISD::OR: BinOpc = RISCV::ORI; break; 669 case ISD::XOR: BinOpc = RISCV::XORI; break; 670 } 671 672 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI; 673 674 SDNode *BinOp = 675 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0), 676 CurDAG->getTargetConstant(ShiftedVal, DL, VT)); 677 SDNode *SLLI = 678 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0), 679 CurDAG->getTargetConstant(ShAmt, DL, VT)); 680 ReplaceNode(Node, SLLI); 681 return true; 682 } 683 684 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { 685 // Only supported with XTHeadBb at the moment. 686 if (!Subtarget->hasVendorXTHeadBb()) 687 return false; 688 689 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 690 if (!N1C) 691 return false; 692 693 SDValue N0 = Node->getOperand(0); 694 if (!N0.hasOneUse()) 695 return false; 696 697 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, 698 MVT VT) { 699 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), 700 CurDAG->getTargetConstant(Msb, DL, VT), 701 CurDAG->getTargetConstant(Lsb, DL, VT)); 702 }; 703 704 SDLoc DL(Node); 705 MVT VT = Node->getSimpleValueType(0); 706 const unsigned RightShAmt = N1C->getZExtValue(); 707 708 // Transform (sra (shl X, C1) C2) with C1 < C2 709 // -> (TH.EXT X, msb, lsb) 710 if (N0.getOpcode() == ISD::SHL) { 711 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 712 if (!N01C) 713 return false; 714 715 const unsigned LeftShAmt = N01C->getZExtValue(); 716 // Make sure that this is a bitfield extraction (i.e., the shift-right 717 // amount can not be less than the left-shift). 718 if (LeftShAmt > RightShAmt) 719 return false; 720 721 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; 722 const unsigned Msb = MsbPlusOne - 1; 723 const unsigned Lsb = RightShAmt - LeftShAmt; 724 725 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); 726 ReplaceNode(Node, TH_EXT); 727 return true; 728 } 729 730 // Transform (sra (sext_inreg X, _), C) -> 731 // (TH.EXT X, msb, lsb) 732 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { 733 unsigned ExtSize = 734 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 735 736 // ExtSize of 32 should use sraiw via tablegen pattern. 737 if (ExtSize == 32) 738 return false; 739 740 const unsigned Msb = ExtSize - 1; 741 const unsigned Lsb = RightShAmt; 742 743 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); 744 ReplaceNode(Node, TH_EXT); 745 return true; 746 } 747 748 return false; 749 } 750 751 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { 752 // Target does not support indexed loads. 753 if (!Subtarget->hasVendorXTHeadMemIdx()) 754 return false; 755 756 LoadSDNode *Ld = cast<LoadSDNode>(Node); 757 ISD::MemIndexedMode AM = Ld->getAddressingMode(); 758 if (AM == ISD::UNINDEXED) 759 return false; 760 761 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset()); 762 if (!C) 763 return false; 764 765 EVT LoadVT = Ld->getMemoryVT(); 766 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) && 767 "Unexpected addressing mode"); 768 bool IsPre = AM == ISD::PRE_INC; 769 bool IsPost = AM == ISD::POST_INC; 770 int64_t Offset = C->getSExtValue(); 771 772 // The constants that can be encoded in the THeadMemIdx instructions 773 // are of the form (sign_extend(imm5) << imm2). 774 int64_t Shift; 775 for (Shift = 0; Shift < 4; Shift++) 776 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 777 break; 778 779 // Constant cannot be encoded. 780 if (Shift == 4) 781 return false; 782 783 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD); 784 unsigned Opcode; 785 if (LoadVT == MVT::i8 && IsPre) 786 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB; 787 else if (LoadVT == MVT::i8 && IsPost) 788 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA; 789 else if (LoadVT == MVT::i16 && IsPre) 790 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB; 791 else if (LoadVT == MVT::i16 && IsPost) 792 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA; 793 else if (LoadVT == MVT::i32 && IsPre) 794 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB; 795 else if (LoadVT == MVT::i32 && IsPost) 796 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA; 797 else if (LoadVT == MVT::i64 && IsPre) 798 Opcode = RISCV::TH_LDIB; 799 else if (LoadVT == MVT::i64 && IsPost) 800 Opcode = RISCV::TH_LDIA; 801 else 802 return false; 803 804 EVT Ty = Ld->getOffset().getValueType(); 805 SDValue Ops[] = {Ld->getBasePtr(), 806 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty), 807 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), 808 Ld->getChain()}; 809 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0), 810 Ld->getValueType(1), MVT::Other, Ops); 811 812 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand(); 813 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp}); 814 815 ReplaceNode(Node, New); 816 817 return true; 818 } 819 820 void RISCVDAGToDAGISel::Select(SDNode *Node) { 821 // If we have a custom node, we have already selected. 822 if (Node->isMachineOpcode()) { 823 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 824 Node->setNodeId(-1); 825 return; 826 } 827 828 // Instruction Selection not handled by the auto-generated tablegen selection 829 // should be handled here. 830 unsigned Opcode = Node->getOpcode(); 831 MVT XLenVT = Subtarget->getXLenVT(); 832 SDLoc DL(Node); 833 MVT VT = Node->getSimpleValueType(0); 834 835 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs(); 836 837 switch (Opcode) { 838 case ISD::Constant: { 839 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT"); 840 auto *ConstNode = cast<ConstantSDNode>(Node); 841 if (ConstNode->isZero()) { 842 SDValue New = 843 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 844 ReplaceNode(Node, New.getNode()); 845 return; 846 } 847 int64_t Imm = ConstNode->getSExtValue(); 848 // If the upper XLen-16 bits are not used, try to convert this to a simm12 849 // by sign extending bit 15. 850 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 851 hasAllHUsers(Node)) 852 Imm = SignExtend64<16>(Imm); 853 // If the upper 32-bits are not used try to convert this into a simm32 by 854 // sign extending bit 32. 855 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 856 Imm = SignExtend64<32>(Imm); 857 858 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode()); 859 return; 860 } 861 case ISD::ConstantFP: { 862 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF(); 863 auto [FPImm, NeedsFNeg] = 864 static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF, 865 VT); 866 if (FPImm >= 0) { 867 unsigned Opc; 868 unsigned FNegOpc; 869 switch (VT.SimpleTy) { 870 default: 871 llvm_unreachable("Unexpected size"); 872 case MVT::f16: 873 Opc = RISCV::FLI_H; 874 FNegOpc = RISCV::FSGNJN_H; 875 break; 876 case MVT::f32: 877 Opc = RISCV::FLI_S; 878 FNegOpc = RISCV::FSGNJN_S; 879 break; 880 case MVT::f64: 881 Opc = RISCV::FLI_D; 882 FNegOpc = RISCV::FSGNJN_D; 883 break; 884 } 885 SDNode *Res = CurDAG->getMachineNode( 886 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT)); 887 if (NeedsFNeg) 888 Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0), 889 SDValue(Res, 0)); 890 891 ReplaceNode(Node, Res); 892 return; 893 } 894 895 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64; 896 SDValue Imm; 897 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will 898 // create an integer immediate. 899 if (APF.isPosZero() || NegZeroF64) 900 Imm = CurDAG->getRegister(RISCV::X0, XLenVT); 901 else 902 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 903 *Subtarget); 904 905 bool HasZdinx = Subtarget->hasStdExtZdinx(); 906 bool Is64Bit = Subtarget->is64Bit(); 907 unsigned Opc; 908 switch (VT.SimpleTy) { 909 default: 910 llvm_unreachable("Unexpected size"); 911 case MVT::bf16: 912 assert(Subtarget->hasStdExtZfbfmin()); 913 Opc = RISCV::FMV_H_X; 914 break; 915 case MVT::f16: 916 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; 917 break; 918 case MVT::f32: 919 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X; 920 break; 921 case MVT::f64: 922 // For RV32, we can't move from a GPR, we need to convert instead. This 923 // should only happen for +0.0 and -0.0. 924 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant"); 925 if (Is64Bit) 926 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X; 927 else 928 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W; 929 break; 930 } 931 932 SDNode *Res; 933 if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) 934 Res = CurDAG->getMachineNode( 935 Opc, DL, VT, Imm, 936 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT)); 937 else 938 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm); 939 940 // For f64 -0.0, we need to insert a fneg.d idiom. 941 if (NegZeroF64) { 942 Opc = RISCV::FSGNJN_D; 943 if (HasZdinx) 944 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X; 945 Res = 946 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0)); 947 } 948 949 ReplaceNode(Node, Res); 950 return; 951 } 952 case RISCVISD::SplitF64: { 953 if (!Subtarget->hasStdExtZfa()) 954 break; 955 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() && 956 "Unexpected subtarget"); 957 958 // With Zfa, lower to fmv.x.w and fmvh.x.d. 959 if (!SDValue(Node, 0).use_empty()) { 960 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT, 961 Node->getOperand(0)); 962 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0)); 963 } 964 if (!SDValue(Node, 1).use_empty()) { 965 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT, 966 Node->getOperand(0)); 967 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0)); 968 } 969 970 CurDAG->RemoveDeadNode(Node); 971 return; 972 } 973 case ISD::SHL: { 974 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 975 if (!N1C) 976 break; 977 SDValue N0 = Node->getOperand(0); 978 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 979 !isa<ConstantSDNode>(N0.getOperand(1))) 980 break; 981 unsigned ShAmt = N1C->getZExtValue(); 982 uint64_t Mask = N0.getConstantOperandVal(1); 983 984 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has 985 // 32 leading zeros and C3 trailing zeros. 986 if (ShAmt <= 32 && isShiftedMask_64(Mask)) { 987 unsigned XLen = Subtarget->getXLen(); 988 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 989 unsigned TrailingZeros = llvm::countr_zero(Mask); 990 if (TrailingZeros > 0 && LeadingZeros == 32) { 991 SDNode *SRLIW = CurDAG->getMachineNode( 992 RISCV::SRLIW, DL, VT, N0->getOperand(0), 993 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 994 SDNode *SLLI = CurDAG->getMachineNode( 995 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 996 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 997 ReplaceNode(Node, SLLI); 998 return; 999 } 1000 } 1001 break; 1002 } 1003 case ISD::SRL: { 1004 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1005 if (!N1C) 1006 break; 1007 SDValue N0 = Node->getOperand(0); 1008 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1009 break; 1010 unsigned ShAmt = N1C->getZExtValue(); 1011 uint64_t Mask = N0.getConstantOperandVal(1); 1012 1013 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 1014 // 32 leading zeros and C3 trailing zeros. 1015 if (isShiftedMask_64(Mask) && N0.hasOneUse()) { 1016 unsigned XLen = Subtarget->getXLen(); 1017 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 1018 unsigned TrailingZeros = llvm::countr_zero(Mask); 1019 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 1020 SDNode *SRLIW = CurDAG->getMachineNode( 1021 RISCV::SRLIW, DL, VT, N0->getOperand(0), 1022 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 1023 SDNode *SLLI = CurDAG->getMachineNode( 1024 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1025 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 1026 ReplaceNode(Node, SLLI); 1027 return; 1028 } 1029 } 1030 1031 // Optimize (srl (and X, C2), C) -> 1032 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 1033 // Where C2 is a mask with C3 trailing ones. 1034 // Taking into account that the C2 may have had lower bits unset by 1035 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 1036 // This pattern occurs when type legalizing right shifts for types with 1037 // less than XLen bits. 1038 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 1039 if (!isMask_64(Mask)) 1040 break; 1041 unsigned TrailingOnes = llvm::countr_one(Mask); 1042 if (ShAmt >= TrailingOnes) 1043 break; 1044 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64. 1045 if (TrailingOnes == 32) { 1046 SDNode *SRLI = CurDAG->getMachineNode( 1047 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT, 1048 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1049 ReplaceNode(Node, SRLI); 1050 return; 1051 } 1052 1053 // Only do the remaining transforms if the AND has one use. 1054 if (!N0.hasOneUse()) 1055 break; 1056 1057 // If C2 is (1 << ShAmt) use bexti or th.tst if possible. 1058 if (HasBitTest && ShAmt + 1 == TrailingOnes) { 1059 SDNode *BEXTI = CurDAG->getMachineNode( 1060 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT, 1061 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1062 ReplaceNode(Node, BEXTI); 1063 return; 1064 } 1065 1066 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 1067 SDNode *SLLI = 1068 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1069 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1070 SDNode *SRLI = CurDAG->getMachineNode( 1071 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1072 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1073 ReplaceNode(Node, SRLI); 1074 return; 1075 } 1076 case ISD::SRA: { 1077 if (trySignedBitfieldExtract(Node)) 1078 return; 1079 1080 // Optimize (sra (sext_inreg X, i16), C) -> 1081 // (srai (slli X, (XLen-16), (XLen-16) + C) 1082 // And (sra (sext_inreg X, i8), C) -> 1083 // (srai (slli X, (XLen-8), (XLen-8) + C) 1084 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 1085 // This transform matches the code we get without Zbb. The shifts are more 1086 // compressible, and this can help expose CSE opportunities in the sdiv by 1087 // constant optimization. 1088 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1089 if (!N1C) 1090 break; 1091 SDValue N0 = Node->getOperand(0); 1092 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 1093 break; 1094 unsigned ShAmt = N1C->getZExtValue(); 1095 unsigned ExtSize = 1096 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 1097 // ExtSize of 32 should use sraiw via tablegen pattern. 1098 if (ExtSize >= 32 || ShAmt >= ExtSize) 1099 break; 1100 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 1101 SDNode *SLLI = 1102 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1103 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1104 SDNode *SRAI = CurDAG->getMachineNode( 1105 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 1106 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1107 ReplaceNode(Node, SRAI); 1108 return; 1109 } 1110 case ISD::OR: 1111 case ISD::XOR: 1112 if (tryShrinkShlLogicImm(Node)) 1113 return; 1114 1115 break; 1116 case ISD::AND: { 1117 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1118 if (!N1C) 1119 break; 1120 uint64_t C1 = N1C->getZExtValue(); 1121 const bool isC1Mask = isMask_64(C1); 1122 const bool isC1ANDI = isInt<12>(C1); 1123 1124 SDValue N0 = Node->getOperand(0); 1125 1126 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT, 1127 SDValue X, unsigned Msb, 1128 unsigned Lsb) { 1129 if (!Subtarget->hasVendorXTHeadBb()) 1130 return false; 1131 1132 SDNode *TH_EXTU = CurDAG->getMachineNode( 1133 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), 1134 CurDAG->getTargetConstant(Lsb, DL, VT)); 1135 ReplaceNode(Node, TH_EXTU); 1136 return true; 1137 }; 1138 1139 bool LeftShift = N0.getOpcode() == ISD::SHL; 1140 if (LeftShift || N0.getOpcode() == ISD::SRL) { 1141 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 1142 if (!C) 1143 break; 1144 unsigned C2 = C->getZExtValue(); 1145 unsigned XLen = Subtarget->getXLen(); 1146 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 1147 1148 // Keep track of whether this is a c.andi. If we can't use c.andi, the 1149 // shift pair might offer more compression opportunities. 1150 // TODO: We could check for C extension here, but we don't have many lit 1151 // tests with the C extension enabled so not checking gets better 1152 // coverage. 1153 // TODO: What if ANDI faster than shift? 1154 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 1155 1156 // Clear irrelevant bits in the mask. 1157 if (LeftShift) 1158 C1 &= maskTrailingZeros<uint64_t>(C2); 1159 else 1160 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 1161 1162 // Some transforms should only be done if the shift has a single use or 1163 // the AND would become (srli (slli X, 32), 32) 1164 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 1165 1166 SDValue X = N0.getOperand(0); 1167 1168 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 1169 // with c3 leading zeros. 1170 if (!LeftShift && isC1Mask) { 1171 unsigned Leading = XLen - llvm::bit_width(C1); 1172 if (C2 < Leading) { 1173 // If the number of leading zeros is C2+32 this can be SRLIW. 1174 if (C2 + 32 == Leading) { 1175 SDNode *SRLIW = CurDAG->getMachineNode( 1176 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 1177 ReplaceNode(Node, SRLIW); 1178 return; 1179 } 1180 1181 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) 1182 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 1183 // 1184 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 1185 // legalized and goes through DAG combine. 1186 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 1187 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1188 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 1189 SDNode *SRAIW = 1190 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 1191 CurDAG->getTargetConstant(31, DL, VT)); 1192 SDNode *SRLIW = CurDAG->getMachineNode( 1193 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 1194 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 1195 ReplaceNode(Node, SRLIW); 1196 return; 1197 } 1198 1199 // Try to use an unsigned bitfield extract (e.g., th.extu) if 1200 // available. 1201 // Transform (and (srl x, C2), C1) 1202 // -> (<bfextract> x, msb, lsb) 1203 // 1204 // Make sure to keep this below the SRLIW cases, as we always want to 1205 // prefer the more common instruction. 1206 const unsigned Msb = llvm::bit_width(C1) + C2 - 1; 1207 const unsigned Lsb = C2; 1208 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) 1209 return; 1210 1211 // (srli (slli x, c3-c2), c3). 1212 // Skip if we could use (zext.w (sraiw X, C2)). 1213 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 1214 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1215 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 1216 // Also Skip if we can use bexti or th.tst. 1217 Skip |= HasBitTest && Leading == XLen - 1; 1218 if (OneUseOrZExtW && !Skip) { 1219 SDNode *SLLI = CurDAG->getMachineNode( 1220 RISCV::SLLI, DL, VT, X, 1221 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 1222 SDNode *SRLI = CurDAG->getMachineNode( 1223 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1224 CurDAG->getTargetConstant(Leading, DL, VT)); 1225 ReplaceNode(Node, SRLI); 1226 return; 1227 } 1228 } 1229 } 1230 1231 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 1232 // shifted by c2 bits with c3 leading zeros. 1233 if (LeftShift && isShiftedMask_64(C1)) { 1234 unsigned Leading = XLen - llvm::bit_width(C1); 1235 1236 if (C2 + Leading < XLen && 1237 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 1238 // Use slli.uw when possible. 1239 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 1240 SDNode *SLLI_UW = 1241 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X, 1242 CurDAG->getTargetConstant(C2, DL, VT)); 1243 ReplaceNode(Node, SLLI_UW); 1244 return; 1245 } 1246 1247 // (srli (slli c2+c3), c3) 1248 if (OneUseOrZExtW && !IsCANDI) { 1249 SDNode *SLLI = CurDAG->getMachineNode( 1250 RISCV::SLLI, DL, VT, X, 1251 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 1252 SDNode *SRLI = CurDAG->getMachineNode( 1253 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1254 CurDAG->getTargetConstant(Leading, DL, VT)); 1255 ReplaceNode(Node, SRLI); 1256 return; 1257 } 1258 } 1259 } 1260 1261 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 1262 // shifted mask with c2 leading zeros and c3 trailing zeros. 1263 if (!LeftShift && isShiftedMask_64(C1)) { 1264 unsigned Leading = XLen - llvm::bit_width(C1); 1265 unsigned Trailing = llvm::countr_zero(C1); 1266 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && 1267 !IsCANDI) { 1268 unsigned SrliOpc = RISCV::SRLI; 1269 // If the input is zexti32 we should use SRLIW. 1270 if (X.getOpcode() == ISD::AND && 1271 isa<ConstantSDNode>(X.getOperand(1)) && 1272 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 1273 SrliOpc = RISCV::SRLIW; 1274 X = X.getOperand(0); 1275 } 1276 SDNode *SRLI = CurDAG->getMachineNode( 1277 SrliOpc, DL, VT, X, 1278 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1279 SDNode *SLLI = CurDAG->getMachineNode( 1280 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1281 CurDAG->getTargetConstant(Trailing, DL, VT)); 1282 ReplaceNode(Node, SLLI); 1283 return; 1284 } 1285 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 1286 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 1287 OneUseOrZExtW && !IsCANDI) { 1288 SDNode *SRLIW = CurDAG->getMachineNode( 1289 RISCV::SRLIW, DL, VT, X, 1290 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1291 SDNode *SLLI = CurDAG->getMachineNode( 1292 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1293 CurDAG->getTargetConstant(Trailing, DL, VT)); 1294 ReplaceNode(Node, SLLI); 1295 return; 1296 } 1297 } 1298 1299 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 1300 // shifted mask with no leading zeros and c3 trailing zeros. 1301 if (LeftShift && isShiftedMask_64(C1)) { 1302 unsigned Leading = XLen - llvm::bit_width(C1); 1303 unsigned Trailing = llvm::countr_zero(C1); 1304 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 1305 SDNode *SRLI = CurDAG->getMachineNode( 1306 RISCV::SRLI, DL, VT, X, 1307 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1308 SDNode *SLLI = CurDAG->getMachineNode( 1309 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1310 CurDAG->getTargetConstant(Trailing, DL, VT)); 1311 ReplaceNode(Node, SLLI); 1312 return; 1313 } 1314 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 1315 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 1316 SDNode *SRLIW = CurDAG->getMachineNode( 1317 RISCV::SRLIW, DL, VT, X, 1318 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1319 SDNode *SLLI = CurDAG->getMachineNode( 1320 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1321 CurDAG->getTargetConstant(Trailing, DL, VT)); 1322 ReplaceNode(Node, SLLI); 1323 return; 1324 } 1325 } 1326 } 1327 1328 // If C1 masks off the upper bits only (but can't be formed as an 1329 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if 1330 // available. 1331 // Transform (and x, C1) 1332 // -> (<bfextract> x, msb, lsb) 1333 if (isC1Mask && !isC1ANDI) { 1334 const unsigned Msb = llvm::bit_width(C1) - 1; 1335 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) 1336 return; 1337 } 1338 1339 if (tryShrinkShlLogicImm(Node)) 1340 return; 1341 1342 break; 1343 } 1344 case ISD::MUL: { 1345 // Special case for calculating (mul (and X, C2), C1) where the full product 1346 // fits in XLen bits. We can shift X left by the number of leading zeros in 1347 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1348 // product has XLen trailing zeros, putting it in the output of MULHU. This 1349 // can avoid materializing a constant in a register for C2. 1350 1351 // RHS should be a constant. 1352 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1353 if (!N1C || !N1C->hasOneUse()) 1354 break; 1355 1356 // LHS should be an AND with constant. 1357 SDValue N0 = Node->getOperand(0); 1358 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1359 break; 1360 1361 uint64_t C2 = N0.getConstantOperandVal(1); 1362 1363 // Constant should be a mask. 1364 if (!isMask_64(C2)) 1365 break; 1366 1367 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has 1368 // multiple users or the constant is a simm12. This prevents inserting a 1369 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely 1370 // make it more costly to materialize. Otherwise, using a SLLI might allow 1371 // it to be compressed. 1372 bool IsANDIOrZExt = 1373 isInt<12>(C2) || 1374 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()); 1375 // With XTHeadBb, we can use TH.EXTU. 1376 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb(); 1377 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) 1378 break; 1379 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or 1380 // the constant is a simm32. 1381 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba(); 1382 // With XTHeadBb, we can use TH.EXTU. 1383 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb(); 1384 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse())) 1385 break; 1386 1387 // We need to shift left the AND input and C1 by a total of XLen bits. 1388 1389 // How far left do we need to shift the AND input? 1390 unsigned XLen = Subtarget->getXLen(); 1391 unsigned LeadingZeros = XLen - llvm::bit_width(C2); 1392 1393 // The constant gets shifted by the remaining amount unless that would 1394 // shift bits out. 1395 uint64_t C1 = N1C->getZExtValue(); 1396 unsigned ConstantShift = XLen - LeadingZeros; 1397 if (ConstantShift > (XLen - llvm::bit_width(C1))) 1398 break; 1399 1400 uint64_t ShiftedC1 = C1 << ConstantShift; 1401 // If this RV32, we need to sign extend the constant. 1402 if (XLen == 32) 1403 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1404 1405 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1406 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode(); 1407 SDNode *SLLI = 1408 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1409 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1410 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1411 SDValue(SLLI, 0), SDValue(Imm, 0)); 1412 ReplaceNode(Node, MULHU); 1413 return; 1414 } 1415 case ISD::LOAD: { 1416 if (tryIndexedLoad(Node)) 1417 return; 1418 break; 1419 } 1420 case ISD::INTRINSIC_WO_CHAIN: { 1421 unsigned IntNo = Node->getConstantOperandVal(0); 1422 switch (IntNo) { 1423 // By default we do not custom select any intrinsic. 1424 default: 1425 break; 1426 case Intrinsic::riscv_vmsgeu: 1427 case Intrinsic::riscv_vmsge: { 1428 SDValue Src1 = Node->getOperand(1); 1429 SDValue Src2 = Node->getOperand(2); 1430 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1431 bool IsCmpUnsignedZero = false; 1432 // Only custom select scalar second operand. 1433 if (Src2.getValueType() != XLenVT) 1434 break; 1435 // Small constants are handled with patterns. 1436 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1437 int64_t CVal = C->getSExtValue(); 1438 if (CVal >= -15 && CVal <= 16) { 1439 if (!IsUnsigned || CVal != 0) 1440 break; 1441 IsCmpUnsignedZero = true; 1442 } 1443 } 1444 MVT Src1VT = Src1.getSimpleValueType(); 1445 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1446 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1447 default: 1448 llvm_unreachable("Unexpected LMUL!"); 1449 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1450 case RISCVII::VLMUL::lmulenum: \ 1451 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1452 : RISCV::PseudoVMSLT_VX_##suffix; \ 1453 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1454 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1455 break; 1456 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1457 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1458 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1459 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1460 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1461 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1462 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1463 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1464 } 1465 SDValue SEW = CurDAG->getTargetConstant( 1466 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1467 SDValue VL; 1468 selectVLOp(Node->getOperand(3), VL); 1469 1470 // If vmsgeu with 0 immediate, expand it to vmset. 1471 if (IsCmpUnsignedZero) { 1472 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1473 return; 1474 } 1475 1476 // Expand to 1477 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1478 SDValue Cmp = SDValue( 1479 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1480 0); 1481 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1482 {Cmp, Cmp, VL, SEW})); 1483 return; 1484 } 1485 case Intrinsic::riscv_vmsgeu_mask: 1486 case Intrinsic::riscv_vmsge_mask: { 1487 SDValue Src1 = Node->getOperand(2); 1488 SDValue Src2 = Node->getOperand(3); 1489 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1490 bool IsCmpUnsignedZero = false; 1491 // Only custom select scalar second operand. 1492 if (Src2.getValueType() != XLenVT) 1493 break; 1494 // Small constants are handled with patterns. 1495 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1496 int64_t CVal = C->getSExtValue(); 1497 if (CVal >= -15 && CVal <= 16) { 1498 if (!IsUnsigned || CVal != 0) 1499 break; 1500 IsCmpUnsignedZero = true; 1501 } 1502 } 1503 MVT Src1VT = Src1.getSimpleValueType(); 1504 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1505 VMOROpcode; 1506 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1507 default: 1508 llvm_unreachable("Unexpected LMUL!"); 1509 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1510 case RISCVII::VLMUL::lmulenum: \ 1511 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1512 : RISCV::PseudoVMSLT_VX_##suffix; \ 1513 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1514 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1515 break; 1516 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1517 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1518 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1519 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1520 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1521 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1522 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1523 #undef CASE_VMSLT_OPCODES 1524 } 1525 // Mask operations use the LMUL from the mask type. 1526 switch (RISCVTargetLowering::getLMUL(VT)) { 1527 default: 1528 llvm_unreachable("Unexpected LMUL!"); 1529 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1530 case RISCVII::VLMUL::lmulenum: \ 1531 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1532 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1533 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1534 break; 1535 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1536 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1537 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1538 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1539 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1540 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1541 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1542 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1543 } 1544 SDValue SEW = CurDAG->getTargetConstant( 1545 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1546 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1547 SDValue VL; 1548 selectVLOp(Node->getOperand(5), VL); 1549 SDValue MaskedOff = Node->getOperand(1); 1550 SDValue Mask = Node->getOperand(4); 1551 1552 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1553 if (IsCmpUnsignedZero) { 1554 // We don't need vmor if the MaskedOff and the Mask are the same 1555 // value. 1556 if (Mask == MaskedOff) { 1557 ReplaceUses(Node, Mask.getNode()); 1558 return; 1559 } 1560 ReplaceNode(Node, 1561 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1562 {Mask, MaskedOff, VL, MaskSEW})); 1563 return; 1564 } 1565 1566 // If the MaskedOff value and the Mask are the same value use 1567 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1568 // This avoids needing to copy v0 to vd before starting the next sequence. 1569 if (Mask == MaskedOff) { 1570 SDValue Cmp = SDValue( 1571 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1572 0); 1573 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1574 {Mask, Cmp, VL, MaskSEW})); 1575 return; 1576 } 1577 1578 // Mask needs to be copied to V0. 1579 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1580 RISCV::V0, Mask, SDValue()); 1581 SDValue Glue = Chain.getValue(1); 1582 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1583 1584 // Otherwise use 1585 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1586 // The result is mask undisturbed. 1587 // We use the same instructions to emulate mask agnostic behavior, because 1588 // the agnostic result can be either undisturbed or all 1. 1589 SDValue Cmp = SDValue( 1590 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1591 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1592 0); 1593 // vmxor.mm vd, vd, v0 is used to update active value. 1594 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1595 {Cmp, Mask, VL, MaskSEW})); 1596 return; 1597 } 1598 case Intrinsic::riscv_vsetvli: 1599 case Intrinsic::riscv_vsetvlimax: 1600 return selectVSETVLI(Node); 1601 } 1602 break; 1603 } 1604 case ISD::INTRINSIC_W_CHAIN: { 1605 unsigned IntNo = Node->getConstantOperandVal(1); 1606 switch (IntNo) { 1607 // By default we do not custom select any intrinsic. 1608 default: 1609 break; 1610 case Intrinsic::riscv_vlseg2: 1611 case Intrinsic::riscv_vlseg3: 1612 case Intrinsic::riscv_vlseg4: 1613 case Intrinsic::riscv_vlseg5: 1614 case Intrinsic::riscv_vlseg6: 1615 case Intrinsic::riscv_vlseg7: 1616 case Intrinsic::riscv_vlseg8: { 1617 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1618 return; 1619 } 1620 case Intrinsic::riscv_vlseg2_mask: 1621 case Intrinsic::riscv_vlseg3_mask: 1622 case Intrinsic::riscv_vlseg4_mask: 1623 case Intrinsic::riscv_vlseg5_mask: 1624 case Intrinsic::riscv_vlseg6_mask: 1625 case Intrinsic::riscv_vlseg7_mask: 1626 case Intrinsic::riscv_vlseg8_mask: { 1627 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1628 return; 1629 } 1630 case Intrinsic::riscv_vlsseg2: 1631 case Intrinsic::riscv_vlsseg3: 1632 case Intrinsic::riscv_vlsseg4: 1633 case Intrinsic::riscv_vlsseg5: 1634 case Intrinsic::riscv_vlsseg6: 1635 case Intrinsic::riscv_vlsseg7: 1636 case Intrinsic::riscv_vlsseg8: { 1637 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1638 return; 1639 } 1640 case Intrinsic::riscv_vlsseg2_mask: 1641 case Intrinsic::riscv_vlsseg3_mask: 1642 case Intrinsic::riscv_vlsseg4_mask: 1643 case Intrinsic::riscv_vlsseg5_mask: 1644 case Intrinsic::riscv_vlsseg6_mask: 1645 case Intrinsic::riscv_vlsseg7_mask: 1646 case Intrinsic::riscv_vlsseg8_mask: { 1647 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1648 return; 1649 } 1650 case Intrinsic::riscv_vloxseg2: 1651 case Intrinsic::riscv_vloxseg3: 1652 case Intrinsic::riscv_vloxseg4: 1653 case Intrinsic::riscv_vloxseg5: 1654 case Intrinsic::riscv_vloxseg6: 1655 case Intrinsic::riscv_vloxseg7: 1656 case Intrinsic::riscv_vloxseg8: 1657 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1658 return; 1659 case Intrinsic::riscv_vluxseg2: 1660 case Intrinsic::riscv_vluxseg3: 1661 case Intrinsic::riscv_vluxseg4: 1662 case Intrinsic::riscv_vluxseg5: 1663 case Intrinsic::riscv_vluxseg6: 1664 case Intrinsic::riscv_vluxseg7: 1665 case Intrinsic::riscv_vluxseg8: 1666 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1667 return; 1668 case Intrinsic::riscv_vloxseg2_mask: 1669 case Intrinsic::riscv_vloxseg3_mask: 1670 case Intrinsic::riscv_vloxseg4_mask: 1671 case Intrinsic::riscv_vloxseg5_mask: 1672 case Intrinsic::riscv_vloxseg6_mask: 1673 case Intrinsic::riscv_vloxseg7_mask: 1674 case Intrinsic::riscv_vloxseg8_mask: 1675 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1676 return; 1677 case Intrinsic::riscv_vluxseg2_mask: 1678 case Intrinsic::riscv_vluxseg3_mask: 1679 case Intrinsic::riscv_vluxseg4_mask: 1680 case Intrinsic::riscv_vluxseg5_mask: 1681 case Intrinsic::riscv_vluxseg6_mask: 1682 case Intrinsic::riscv_vluxseg7_mask: 1683 case Intrinsic::riscv_vluxseg8_mask: 1684 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1685 return; 1686 case Intrinsic::riscv_vlseg8ff: 1687 case Intrinsic::riscv_vlseg7ff: 1688 case Intrinsic::riscv_vlseg6ff: 1689 case Intrinsic::riscv_vlseg5ff: 1690 case Intrinsic::riscv_vlseg4ff: 1691 case Intrinsic::riscv_vlseg3ff: 1692 case Intrinsic::riscv_vlseg2ff: { 1693 selectVLSEGFF(Node, /*IsMasked*/ false); 1694 return; 1695 } 1696 case Intrinsic::riscv_vlseg8ff_mask: 1697 case Intrinsic::riscv_vlseg7ff_mask: 1698 case Intrinsic::riscv_vlseg6ff_mask: 1699 case Intrinsic::riscv_vlseg5ff_mask: 1700 case Intrinsic::riscv_vlseg4ff_mask: 1701 case Intrinsic::riscv_vlseg3ff_mask: 1702 case Intrinsic::riscv_vlseg2ff_mask: { 1703 selectVLSEGFF(Node, /*IsMasked*/ true); 1704 return; 1705 } 1706 case Intrinsic::riscv_vloxei: 1707 case Intrinsic::riscv_vloxei_mask: 1708 case Intrinsic::riscv_vluxei: 1709 case Intrinsic::riscv_vluxei_mask: { 1710 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1711 IntNo == Intrinsic::riscv_vluxei_mask; 1712 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1713 IntNo == Intrinsic::riscv_vloxei_mask; 1714 1715 MVT VT = Node->getSimpleValueType(0); 1716 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1717 1718 unsigned CurOp = 2; 1719 SmallVector<SDValue, 8> Operands; 1720 Operands.push_back(Node->getOperand(CurOp++)); 1721 1722 MVT IndexVT; 1723 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1724 /*IsStridedOrIndexed*/ true, Operands, 1725 /*IsLoad=*/true, &IndexVT); 1726 1727 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1728 "Element count mismatch"); 1729 1730 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1731 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1732 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1733 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1734 report_fatal_error("The V extension does not support EEW=64 for index " 1735 "values when XLEN=32"); 1736 } 1737 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1738 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1739 static_cast<unsigned>(IndexLMUL)); 1740 MachineSDNode *Load = 1741 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1742 1743 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1744 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1745 1746 ReplaceNode(Node, Load); 1747 return; 1748 } 1749 case Intrinsic::riscv_vlm: 1750 case Intrinsic::riscv_vle: 1751 case Intrinsic::riscv_vle_mask: 1752 case Intrinsic::riscv_vlse: 1753 case Intrinsic::riscv_vlse_mask: { 1754 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1755 IntNo == Intrinsic::riscv_vlse_mask; 1756 bool IsStrided = 1757 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1758 1759 MVT VT = Node->getSimpleValueType(0); 1760 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1761 1762 // The riscv_vlm intrinsic are always tail agnostic and no passthru 1763 // operand at the IR level. In pseudos, they have both policy and 1764 // passthru operand. The passthru operand is needed to track the 1765 // "tail undefined" state, and the policy is there just for 1766 // for consistency - it will always be "don't care" for the 1767 // unmasked form. 1768 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1769 unsigned CurOp = 2; 1770 SmallVector<SDValue, 8> Operands; 1771 if (HasPassthruOperand) 1772 Operands.push_back(Node->getOperand(CurOp++)); 1773 else { 1774 // We eagerly lower to implicit_def (instead of undef), as we 1775 // otherwise fail to select nodes such as: nxv1i1 = undef 1776 SDNode *Passthru = 1777 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 1778 Operands.push_back(SDValue(Passthru, 0)); 1779 } 1780 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1781 Operands, /*IsLoad=*/true); 1782 1783 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1784 const RISCV::VLEPseudo *P = 1785 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, 1786 static_cast<unsigned>(LMUL)); 1787 MachineSDNode *Load = 1788 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1789 1790 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1791 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1792 1793 ReplaceNode(Node, Load); 1794 return; 1795 } 1796 case Intrinsic::riscv_vleff: 1797 case Intrinsic::riscv_vleff_mask: { 1798 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1799 1800 MVT VT = Node->getSimpleValueType(0); 1801 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1802 1803 unsigned CurOp = 2; 1804 SmallVector<SDValue, 7> Operands; 1805 Operands.push_back(Node->getOperand(CurOp++)); 1806 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1807 /*IsStridedOrIndexed*/ false, Operands, 1808 /*IsLoad=*/true); 1809 1810 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1811 const RISCV::VLEPseudo *P = 1812 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, 1813 Log2SEW, static_cast<unsigned>(LMUL)); 1814 MachineSDNode *Load = CurDAG->getMachineNode( 1815 P->Pseudo, DL, Node->getVTList(), Operands); 1816 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1817 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1818 1819 ReplaceNode(Node, Load); 1820 return; 1821 } 1822 } 1823 break; 1824 } 1825 case ISD::INTRINSIC_VOID: { 1826 unsigned IntNo = Node->getConstantOperandVal(1); 1827 switch (IntNo) { 1828 case Intrinsic::riscv_vsseg2: 1829 case Intrinsic::riscv_vsseg3: 1830 case Intrinsic::riscv_vsseg4: 1831 case Intrinsic::riscv_vsseg5: 1832 case Intrinsic::riscv_vsseg6: 1833 case Intrinsic::riscv_vsseg7: 1834 case Intrinsic::riscv_vsseg8: { 1835 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1836 return; 1837 } 1838 case Intrinsic::riscv_vsseg2_mask: 1839 case Intrinsic::riscv_vsseg3_mask: 1840 case Intrinsic::riscv_vsseg4_mask: 1841 case Intrinsic::riscv_vsseg5_mask: 1842 case Intrinsic::riscv_vsseg6_mask: 1843 case Intrinsic::riscv_vsseg7_mask: 1844 case Intrinsic::riscv_vsseg8_mask: { 1845 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1846 return; 1847 } 1848 case Intrinsic::riscv_vssseg2: 1849 case Intrinsic::riscv_vssseg3: 1850 case Intrinsic::riscv_vssseg4: 1851 case Intrinsic::riscv_vssseg5: 1852 case Intrinsic::riscv_vssseg6: 1853 case Intrinsic::riscv_vssseg7: 1854 case Intrinsic::riscv_vssseg8: { 1855 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1856 return; 1857 } 1858 case Intrinsic::riscv_vssseg2_mask: 1859 case Intrinsic::riscv_vssseg3_mask: 1860 case Intrinsic::riscv_vssseg4_mask: 1861 case Intrinsic::riscv_vssseg5_mask: 1862 case Intrinsic::riscv_vssseg6_mask: 1863 case Intrinsic::riscv_vssseg7_mask: 1864 case Intrinsic::riscv_vssseg8_mask: { 1865 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1866 return; 1867 } 1868 case Intrinsic::riscv_vsoxseg2: 1869 case Intrinsic::riscv_vsoxseg3: 1870 case Intrinsic::riscv_vsoxseg4: 1871 case Intrinsic::riscv_vsoxseg5: 1872 case Intrinsic::riscv_vsoxseg6: 1873 case Intrinsic::riscv_vsoxseg7: 1874 case Intrinsic::riscv_vsoxseg8: 1875 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1876 return; 1877 case Intrinsic::riscv_vsuxseg2: 1878 case Intrinsic::riscv_vsuxseg3: 1879 case Intrinsic::riscv_vsuxseg4: 1880 case Intrinsic::riscv_vsuxseg5: 1881 case Intrinsic::riscv_vsuxseg6: 1882 case Intrinsic::riscv_vsuxseg7: 1883 case Intrinsic::riscv_vsuxseg8: 1884 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1885 return; 1886 case Intrinsic::riscv_vsoxseg2_mask: 1887 case Intrinsic::riscv_vsoxseg3_mask: 1888 case Intrinsic::riscv_vsoxseg4_mask: 1889 case Intrinsic::riscv_vsoxseg5_mask: 1890 case Intrinsic::riscv_vsoxseg6_mask: 1891 case Intrinsic::riscv_vsoxseg7_mask: 1892 case Intrinsic::riscv_vsoxseg8_mask: 1893 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1894 return; 1895 case Intrinsic::riscv_vsuxseg2_mask: 1896 case Intrinsic::riscv_vsuxseg3_mask: 1897 case Intrinsic::riscv_vsuxseg4_mask: 1898 case Intrinsic::riscv_vsuxseg5_mask: 1899 case Intrinsic::riscv_vsuxseg6_mask: 1900 case Intrinsic::riscv_vsuxseg7_mask: 1901 case Intrinsic::riscv_vsuxseg8_mask: 1902 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1903 return; 1904 case Intrinsic::riscv_vsoxei: 1905 case Intrinsic::riscv_vsoxei_mask: 1906 case Intrinsic::riscv_vsuxei: 1907 case Intrinsic::riscv_vsuxei_mask: { 1908 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1909 IntNo == Intrinsic::riscv_vsuxei_mask; 1910 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1911 IntNo == Intrinsic::riscv_vsoxei_mask; 1912 1913 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1914 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1915 1916 unsigned CurOp = 2; 1917 SmallVector<SDValue, 8> Operands; 1918 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1919 1920 MVT IndexVT; 1921 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1922 /*IsStridedOrIndexed*/ true, Operands, 1923 /*IsLoad=*/false, &IndexVT); 1924 1925 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1926 "Element count mismatch"); 1927 1928 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1929 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1930 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1931 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1932 report_fatal_error("The V extension does not support EEW=64 for index " 1933 "values when XLEN=32"); 1934 } 1935 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1936 IsMasked, IsOrdered, IndexLog2EEW, 1937 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1938 MachineSDNode *Store = 1939 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1940 1941 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1942 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1943 1944 ReplaceNode(Node, Store); 1945 return; 1946 } 1947 case Intrinsic::riscv_vsm: 1948 case Intrinsic::riscv_vse: 1949 case Intrinsic::riscv_vse_mask: 1950 case Intrinsic::riscv_vsse: 1951 case Intrinsic::riscv_vsse_mask: { 1952 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1953 IntNo == Intrinsic::riscv_vsse_mask; 1954 bool IsStrided = 1955 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1956 1957 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1958 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1959 1960 unsigned CurOp = 2; 1961 SmallVector<SDValue, 8> Operands; 1962 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1963 1964 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1965 Operands); 1966 1967 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1968 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1969 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1970 MachineSDNode *Store = 1971 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1972 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1973 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1974 1975 ReplaceNode(Node, Store); 1976 return; 1977 } 1978 } 1979 break; 1980 } 1981 case ISD::BITCAST: { 1982 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1983 // Just drop bitcasts between vectors if both are fixed or both are 1984 // scalable. 1985 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1986 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1987 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1988 CurDAG->RemoveDeadNode(Node); 1989 return; 1990 } 1991 break; 1992 } 1993 case ISD::INSERT_SUBVECTOR: { 1994 SDValue V = Node->getOperand(0); 1995 SDValue SubV = Node->getOperand(1); 1996 SDLoc DL(SubV); 1997 auto Idx = Node->getConstantOperandVal(2); 1998 MVT SubVecVT = SubV.getSimpleValueType(); 1999 2000 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2001 MVT SubVecContainerVT = SubVecVT; 2002 // Establish the correct scalable-vector types for any fixed-length type. 2003 if (SubVecVT.isFixedLengthVector()) 2004 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 2005 if (VT.isFixedLengthVector()) 2006 VT = TLI.getContainerForFixedLengthVector(VT); 2007 2008 const auto *TRI = Subtarget->getRegisterInfo(); 2009 unsigned SubRegIdx; 2010 std::tie(SubRegIdx, Idx) = 2011 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2012 VT, SubVecContainerVT, Idx, TRI); 2013 2014 // If the Idx hasn't been completely eliminated then this is a subvector 2015 // insert which doesn't naturally align to a vector register. These must 2016 // be handled using instructions to manipulate the vector registers. 2017 if (Idx != 0) 2018 break; 2019 2020 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 2021 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 2022 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 2023 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 2024 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 2025 assert((!IsSubVecPartReg || V.isUndef()) && 2026 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 2027 "the subvector is smaller than a full-sized register"); 2028 2029 // If we haven't set a SubRegIdx, then we must be going between 2030 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 2031 if (SubRegIdx == RISCV::NoSubRegister) { 2032 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 2033 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2034 InRegClassID && 2035 "Unexpected subvector extraction"); 2036 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2037 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 2038 DL, VT, SubV, RC); 2039 ReplaceNode(Node, NewNode); 2040 return; 2041 } 2042 2043 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 2044 ReplaceNode(Node, Insert.getNode()); 2045 return; 2046 } 2047 case ISD::EXTRACT_SUBVECTOR: { 2048 SDValue V = Node->getOperand(0); 2049 auto Idx = Node->getConstantOperandVal(1); 2050 MVT InVT = V.getSimpleValueType(); 2051 SDLoc DL(V); 2052 2053 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2054 MVT SubVecContainerVT = VT; 2055 // Establish the correct scalable-vector types for any fixed-length type. 2056 if (VT.isFixedLengthVector()) 2057 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 2058 if (InVT.isFixedLengthVector()) 2059 InVT = TLI.getContainerForFixedLengthVector(InVT); 2060 2061 const auto *TRI = Subtarget->getRegisterInfo(); 2062 unsigned SubRegIdx; 2063 std::tie(SubRegIdx, Idx) = 2064 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2065 InVT, SubVecContainerVT, Idx, TRI); 2066 2067 // If the Idx hasn't been completely eliminated then this is a subvector 2068 // extract which doesn't naturally align to a vector register. These must 2069 // be handled using instructions to manipulate the vector registers. 2070 if (Idx != 0) 2071 break; 2072 2073 // If we haven't set a SubRegIdx, then we must be going between 2074 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 2075 if (SubRegIdx == RISCV::NoSubRegister) { 2076 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 2077 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2078 InRegClassID && 2079 "Unexpected subvector extraction"); 2080 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2081 SDNode *NewNode = 2082 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 2083 ReplaceNode(Node, NewNode); 2084 return; 2085 } 2086 2087 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 2088 ReplaceNode(Node, Extract.getNode()); 2089 return; 2090 } 2091 case RISCVISD::VMV_S_X_VL: 2092 case RISCVISD::VFMV_S_F_VL: 2093 case RISCVISD::VMV_V_X_VL: 2094 case RISCVISD::VFMV_V_F_VL: { 2095 // Try to match splat of a scalar load to a strided load with stride of x0. 2096 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 2097 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 2098 if (!Node->getOperand(0).isUndef()) 2099 break; 2100 SDValue Src = Node->getOperand(1); 2101 auto *Ld = dyn_cast<LoadSDNode>(Src); 2102 // Can't fold load update node because the second 2103 // output is used so that load update node can't be removed. 2104 if (!Ld || Ld->isIndexed()) 2105 break; 2106 EVT MemVT = Ld->getMemoryVT(); 2107 // The memory VT should be the same size as the element type. 2108 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 2109 break; 2110 if (!IsProfitableToFold(Src, Node, Node) || 2111 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 2112 break; 2113 2114 SDValue VL; 2115 if (IsScalarMove) { 2116 // We could deal with more VL if we update the VSETVLI insert pass to 2117 // avoid introducing more VSETVLI. 2118 if (!isOneConstant(Node->getOperand(2))) 2119 break; 2120 selectVLOp(Node->getOperand(2), VL); 2121 } else 2122 selectVLOp(Node->getOperand(2), VL); 2123 2124 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2125 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 2126 2127 // If VL=1, then we don't need to do a strided load and can just do a 2128 // regular load. 2129 bool IsStrided = !isOneConstant(VL); 2130 2131 // Only do a strided load if we have optimized zero-stride vector load. 2132 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) 2133 break; 2134 2135 SmallVector<SDValue> Operands = { 2136 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0), 2137 Ld->getBasePtr()}; 2138 if (IsStrided) 2139 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); 2140 uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC; 2141 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 2142 Operands.append({VL, SEW, PolicyOp, Ld->getChain()}); 2143 2144 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2145 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 2146 /*IsMasked*/ false, IsStrided, /*FF*/ false, 2147 Log2SEW, static_cast<unsigned>(LMUL)); 2148 MachineSDNode *Load = 2149 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); 2150 // Update the chain. 2151 ReplaceUses(Src.getValue(1), SDValue(Load, 1)); 2152 // Record the mem-refs 2153 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 2154 // Replace the splat with the vlse. 2155 ReplaceNode(Node, Load); 2156 return; 2157 } 2158 case ISD::PREFETCH: 2159 unsigned Locality = Node->getConstantOperandVal(3); 2160 if (Locality > 2) 2161 break; 2162 2163 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) { 2164 MachineMemOperand *MMO = LoadStoreMem->getMemOperand(); 2165 MMO->setFlags(MachineMemOperand::MONonTemporal); 2166 2167 int NontemporalLevel = 0; 2168 switch (Locality) { 2169 case 0: 2170 NontemporalLevel = 3; // NTL.ALL 2171 break; 2172 case 1: 2173 NontemporalLevel = 1; // NTL.PALL 2174 break; 2175 case 2: 2176 NontemporalLevel = 0; // NTL.P1 2177 break; 2178 default: 2179 llvm_unreachable("unexpected locality value."); 2180 } 2181 2182 if (NontemporalLevel & 0b1) 2183 MMO->setFlags(MONontemporalBit0); 2184 if (NontemporalLevel & 0b10) 2185 MMO->setFlags(MONontemporalBit1); 2186 } 2187 break; 2188 } 2189 2190 // Select the default instruction. 2191 SelectCode(Node); 2192 } 2193 2194 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 2195 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 2196 std::vector<SDValue> &OutOps) { 2197 // Always produce a register and immediate operand, as expected by 2198 // RISCVAsmPrinter::PrintAsmMemoryOperand. 2199 switch (ConstraintID) { 2200 case InlineAsm::ConstraintCode::o: 2201 case InlineAsm::ConstraintCode::m: { 2202 SDValue Op0, Op1; 2203 bool Found = SelectAddrRegImm(Op, Op0, Op1); 2204 assert(Found && "SelectAddrRegImm should always succeed"); 2205 (void)Found; 2206 OutOps.push_back(Op0); 2207 OutOps.push_back(Op1); 2208 return false; 2209 } 2210 case InlineAsm::ConstraintCode::A: 2211 OutOps.push_back(Op); 2212 OutOps.push_back( 2213 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT())); 2214 return false; 2215 default: 2216 report_fatal_error("Unexpected asm memory constraint " + 2217 InlineAsm::getMemConstraintName(ConstraintID)); 2218 } 2219 2220 return true; 2221 } 2222 2223 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 2224 SDValue &Offset) { 2225 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 2226 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 2227 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 2228 return true; 2229 } 2230 2231 return false; 2232 } 2233 2234 // Select a frame index and an optional immediate offset from an ADD or OR. 2235 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 2236 SDValue &Offset) { 2237 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2238 return true; 2239 2240 if (!CurDAG->isBaseWithConstantOffset(Addr)) 2241 return false; 2242 2243 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 2244 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2245 if (isInt<12>(CVal)) { 2246 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 2247 Subtarget->getXLenVT()); 2248 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), 2249 Subtarget->getXLenVT()); 2250 return true; 2251 } 2252 } 2253 2254 return false; 2255 } 2256 2257 // Fold constant addresses. 2258 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, 2259 const MVT VT, const RISCVSubtarget *Subtarget, 2260 SDValue Addr, SDValue &Base, SDValue &Offset, 2261 bool IsPrefetch = false) { 2262 if (!isa<ConstantSDNode>(Addr)) 2263 return false; 2264 2265 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); 2266 2267 // If the constant is a simm12, we can fold the whole constant and use X0 as 2268 // the base. If the constant can be materialized with LUI+simm12, use LUI as 2269 // the base. We can't use generateInstSeq because it favors LUI+ADDIW. 2270 int64_t Lo12 = SignExtend64<12>(CVal); 2271 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; 2272 if (!Subtarget->is64Bit() || isInt<32>(Hi)) { 2273 if (IsPrefetch && (Lo12 & 0b11111) != 0) 2274 return false; 2275 2276 if (Hi) { 2277 int64_t Hi20 = (Hi >> 12) & 0xfffff; 2278 Base = SDValue( 2279 CurDAG->getMachineNode(RISCV::LUI, DL, VT, 2280 CurDAG->getTargetConstant(Hi20, DL, VT)), 2281 0); 2282 } else { 2283 Base = CurDAG->getRegister(RISCV::X0, VT); 2284 } 2285 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 2286 return true; 2287 } 2288 2289 // Ask how constant materialization would handle this constant. 2290 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget); 2291 2292 // If the last instruction would be an ADDI, we can fold its immediate and 2293 // emit the rest of the sequence as the base. 2294 if (Seq.back().getOpcode() != RISCV::ADDI) 2295 return false; 2296 Lo12 = Seq.back().getImm(); 2297 if (IsPrefetch && (Lo12 & 0b11111) != 0) 2298 return false; 2299 2300 // Drop the last instruction. 2301 Seq.pop_back(); 2302 assert(!Seq.empty() && "Expected more instructions in sequence"); 2303 2304 Base = selectImmSeq(CurDAG, DL, VT, Seq); 2305 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 2306 return true; 2307 } 2308 2309 // Is this ADD instruction only used as the base pointer of scalar loads and 2310 // stores? 2311 static bool isWorthFoldingAdd(SDValue Add) { 2312 for (auto *Use : Add->uses()) { 2313 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 2314 Use->getOpcode() != ISD::ATOMIC_LOAD && 2315 Use->getOpcode() != ISD::ATOMIC_STORE) 2316 return false; 2317 EVT VT = cast<MemSDNode>(Use)->getMemoryVT(); 2318 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && 2319 VT != MVT::f64) 2320 return false; 2321 // Don't allow stores of the value. It must be used as the address. 2322 if (Use->getOpcode() == ISD::STORE && 2323 cast<StoreSDNode>(Use)->getValue() == Add) 2324 return false; 2325 if (Use->getOpcode() == ISD::ATOMIC_STORE && 2326 cast<AtomicSDNode>(Use)->getVal() == Add) 2327 return false; 2328 } 2329 2330 return true; 2331 } 2332 2333 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, 2334 unsigned MaxShiftAmount, 2335 SDValue &Base, SDValue &Index, 2336 SDValue &Scale) { 2337 EVT VT = Addr.getSimpleValueType(); 2338 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index, 2339 SDValue &Shift) { 2340 uint64_t ShiftAmt = 0; 2341 Index = N; 2342 2343 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) { 2344 // Only match shifts by a value in range [0, MaxShiftAmount]. 2345 if (N.getConstantOperandVal(1) <= MaxShiftAmount) { 2346 Index = N.getOperand(0); 2347 ShiftAmt = N.getConstantOperandVal(1); 2348 } 2349 } 2350 2351 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT); 2352 return ShiftAmt != 0; 2353 }; 2354 2355 if (Addr.getOpcode() == ISD::ADD) { 2356 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 2357 SDValue AddrB = Addr.getOperand(0); 2358 if (AddrB.getOpcode() == ISD::ADD && 2359 UnwrapShl(AddrB.getOperand(0), Index, Scale) && 2360 !isa<ConstantSDNode>(AddrB.getOperand(1)) && 2361 isInt<12>(C1->getSExtValue())) { 2362 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) 2363 SDValue C1Val = 2364 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT); 2365 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, 2366 AddrB.getOperand(1), C1Val), 2367 0); 2368 return true; 2369 } 2370 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) { 2371 Base = Addr.getOperand(1); 2372 return true; 2373 } else { 2374 UnwrapShl(Addr.getOperand(1), Index, Scale); 2375 Base = Addr.getOperand(0); 2376 return true; 2377 } 2378 } else if (UnwrapShl(Addr, Index, Scale)) { 2379 EVT VT = Addr.getValueType(); 2380 Base = CurDAG->getRegister(RISCV::X0, VT); 2381 return true; 2382 } 2383 2384 return false; 2385 } 2386 2387 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 2388 SDValue &Offset, bool IsINX) { 2389 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2390 return true; 2391 2392 SDLoc DL(Addr); 2393 MVT VT = Addr.getSimpleValueType(); 2394 2395 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 2396 Base = Addr.getOperand(0); 2397 Offset = Addr.getOperand(1); 2398 return true; 2399 } 2400 2401 int64_t RV32ZdinxRange = IsINX ? 4 : 0; 2402 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2403 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2404 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) { 2405 Base = Addr.getOperand(0); 2406 if (Base.getOpcode() == RISCVISD::ADD_LO) { 2407 SDValue LoOperand = Base.getOperand(1); 2408 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 2409 // If the Lo in (ADD_LO hi, lo) is a global variable's address 2410 // (its low part, really), then we can rely on the alignment of that 2411 // variable to provide a margin of safety before low part can overflow 2412 // the 12 bits of the load/store offset. Check if CVal falls within 2413 // that margin; if so (low part + CVal) can't overflow. 2414 const DataLayout &DL = CurDAG->getDataLayout(); 2415 Align Alignment = commonAlignment( 2416 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 2417 if (CVal == 0 || Alignment > CVal) { 2418 int64_t CombinedOffset = CVal + GA->getOffset(); 2419 Base = Base.getOperand(0); 2420 Offset = CurDAG->getTargetGlobalAddress( 2421 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 2422 CombinedOffset, GA->getTargetFlags()); 2423 return true; 2424 } 2425 } 2426 } 2427 2428 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2429 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2430 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 2431 return true; 2432 } 2433 } 2434 2435 // Handle ADD with large immediates. 2436 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2437 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2438 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) && 2439 "simm12 not already handled?"); 2440 2441 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use 2442 // an ADDI for part of the offset and fold the rest into the load/store. 2443 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 2444 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { 2445 int64_t Adj = CVal < 0 ? -2048 : 2047; 2446 Base = SDValue( 2447 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 2448 CurDAG->getTargetConstant(Adj, DL, VT)), 2449 0); 2450 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); 2451 return true; 2452 } 2453 2454 // For larger immediates, we might be able to save one instruction from 2455 // constant materialization by folding the Lo12 bits of the immediate into 2456 // the address. We should only do this if the ADD is only used by loads and 2457 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled 2458 // separately with the full materialized immediate creating extra 2459 // instructions. 2460 if (isWorthFoldingAdd(Addr) && 2461 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2462 Offset)) { 2463 // Insert an ADD instruction with the materialized Hi52 bits. 2464 Base = SDValue( 2465 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2466 0); 2467 return true; 2468 } 2469 } 2470 2471 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) 2472 return true; 2473 2474 Base = Addr; 2475 Offset = CurDAG->getTargetConstant(0, DL, VT); 2476 return true; 2477 } 2478 2479 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of 2480 /// Offset shoule be all zeros. 2481 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, 2482 SDValue &Offset) { 2483 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2484 return true; 2485 2486 SDLoc DL(Addr); 2487 MVT VT = Addr.getSimpleValueType(); 2488 2489 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2490 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2491 if (isInt<12>(CVal)) { 2492 Base = Addr.getOperand(0); 2493 2494 // Early-out if not a valid offset. 2495 if ((CVal & 0b11111) != 0) { 2496 Base = Addr; 2497 Offset = CurDAG->getTargetConstant(0, DL, VT); 2498 return true; 2499 } 2500 2501 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2502 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2503 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 2504 return true; 2505 } 2506 } 2507 2508 // Handle ADD with large immediates. 2509 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2510 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2511 assert(!(isInt<12>(CVal) && isInt<12>(CVal)) && 2512 "simm12 not already handled?"); 2513 2514 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save 2515 // one instruction by folding adjustment (-2048 or 2016) into the address. 2516 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) { 2517 int64_t Adj = CVal < 0 ? -2048 : 2016; 2518 int64_t AdjustedOffset = CVal - Adj; 2519 Base = SDValue(CurDAG->getMachineNode( 2520 RISCV::ADDI, DL, VT, Addr.getOperand(0), 2521 CurDAG->getTargetConstant(AdjustedOffset, DL, VT)), 2522 0); 2523 Offset = CurDAG->getTargetConstant(Adj, DL, VT); 2524 return true; 2525 } 2526 2527 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2528 Offset, true)) { 2529 // Insert an ADD instruction with the materialized Hi52 bits. 2530 Base = SDValue( 2531 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2532 0); 2533 return true; 2534 } 2535 } 2536 2537 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true)) 2538 return true; 2539 2540 Base = Addr; 2541 Offset = CurDAG->getTargetConstant(0, DL, VT); 2542 return true; 2543 } 2544 2545 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 2546 SDValue &ShAmt) { 2547 ShAmt = N; 2548 2549 // Peek through zext. 2550 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND) 2551 ShAmt = ShAmt.getOperand(0); 2552 2553 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift 2554 // amount. If there is an AND on the shift amount, we can bypass it if it 2555 // doesn't affect any of those bits. 2556 if (ShAmt.getOpcode() == ISD::AND && 2557 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2558 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1); 2559 2560 // Since the max shift amount is a power of 2 we can subtract 1 to make a 2561 // mask that covers the bits needed to represent all shift amounts. 2562 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 2563 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 2564 2565 if (ShMask.isSubsetOf(AndMask)) { 2566 ShAmt = ShAmt.getOperand(0); 2567 } else { 2568 // SimplifyDemandedBits may have optimized the mask so try restoring any 2569 // bits that are known zero. 2570 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0)); 2571 if (!ShMask.isSubsetOf(AndMask | Known.Zero)) 2572 return true; 2573 ShAmt = ShAmt.getOperand(0); 2574 } 2575 } 2576 2577 if (ShAmt.getOpcode() == ISD::ADD && 2578 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2579 uint64_t Imm = ShAmt.getConstantOperandVal(1); 2580 // If we are shifting by X+N where N == 0 mod Size, then just shift by X 2581 // to avoid the ADD. 2582 if (Imm != 0 && Imm % ShiftWidth == 0) { 2583 ShAmt = ShAmt.getOperand(0); 2584 return true; 2585 } 2586 } else if (ShAmt.getOpcode() == ISD::SUB && 2587 isa<ConstantSDNode>(ShAmt.getOperand(0))) { 2588 uint64_t Imm = ShAmt.getConstantOperandVal(0); 2589 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2590 // generate a NEG instead of a SUB of a constant. 2591 if (Imm != 0 && Imm % ShiftWidth == 0) { 2592 SDLoc DL(ShAmt); 2593 EVT VT = ShAmt.getValueType(); 2594 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 2595 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 2596 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 2597 ShAmt.getOperand(1)); 2598 ShAmt = SDValue(Neg, 0); 2599 return true; 2600 } 2601 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 2602 // to generate a NOT instead of a SUB of a constant. 2603 if (Imm % ShiftWidth == ShiftWidth - 1) { 2604 SDLoc DL(ShAmt); 2605 EVT VT = ShAmt.getValueType(); 2606 MachineSDNode *Not = 2607 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1), 2608 CurDAG->getTargetConstant(-1, DL, VT)); 2609 ShAmt = SDValue(Not, 0); 2610 return true; 2611 } 2612 } 2613 2614 return true; 2615 } 2616 2617 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can 2618 /// check for equality with 0. This function emits instructions that convert the 2619 /// seteq/setne into something that can be compared with 0. 2620 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g. 2621 /// ISD::SETNE). 2622 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, 2623 SDValue &Val) { 2624 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && 2625 "Unexpected condition code!"); 2626 2627 // We're looking for a setcc. 2628 if (N->getOpcode() != ISD::SETCC) 2629 return false; 2630 2631 // Must be an equality comparison. 2632 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2633 if (CCVal != ExpectedCCVal) 2634 return false; 2635 2636 SDValue LHS = N->getOperand(0); 2637 SDValue RHS = N->getOperand(1); 2638 2639 if (!LHS.getValueType().isScalarInteger()) 2640 return false; 2641 2642 // If the RHS side is 0, we don't need any extra instructions, return the LHS. 2643 if (isNullConstant(RHS)) { 2644 Val = LHS; 2645 return true; 2646 } 2647 2648 SDLoc DL(N); 2649 2650 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) { 2651 int64_t CVal = C->getSExtValue(); 2652 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and 2653 // non-zero otherwise. 2654 if (CVal == -2048) { 2655 Val = 2656 SDValue(CurDAG->getMachineNode( 2657 RISCV::XORI, DL, N->getValueType(0), LHS, 2658 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))), 2659 0); 2660 return true; 2661 } 2662 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the 2663 // LHS is equal to the RHS and non-zero otherwise. 2664 if (isInt<12>(CVal) || CVal == 2048) { 2665 Val = 2666 SDValue(CurDAG->getMachineNode( 2667 RISCV::ADDI, DL, N->getValueType(0), LHS, 2668 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))), 2669 0); 2670 return true; 2671 } 2672 } 2673 2674 // If nothing else we can XOR the LHS and RHS to produce zero if they are 2675 // equal and a non-zero value if they aren't. 2676 Val = SDValue( 2677 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0); 2678 return true; 2679 } 2680 2681 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) { 2682 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 2683 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) { 2684 Val = N.getOperand(0); 2685 return true; 2686 } 2687 2688 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) { 2689 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1))) 2690 return N; 2691 2692 SDValue N0 = N.getOperand(0); 2693 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 2694 N.getConstantOperandVal(1) == ShiftAmt && 2695 N0.getConstantOperandVal(1) == ShiftAmt) 2696 return N0.getOperand(0); 2697 2698 return N; 2699 }; 2700 2701 MVT VT = N.getSimpleValueType(); 2702 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) { 2703 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits); 2704 return true; 2705 } 2706 2707 return false; 2708 } 2709 2710 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) { 2711 if (N.getOpcode() == ISD::AND) { 2712 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2713 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { 2714 Val = N.getOperand(0); 2715 return true; 2716 } 2717 } 2718 MVT VT = N.getSimpleValueType(); 2719 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits); 2720 if (CurDAG->MaskedValueIsZero(N, Mask)) { 2721 Val = N; 2722 return true; 2723 } 2724 2725 return false; 2726 } 2727 2728 /// Look for various patterns that can be done with a SHL that can be folded 2729 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 2730 /// SHXADD we are trying to match. 2731 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 2732 SDValue &Val) { 2733 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 2734 SDValue N0 = N.getOperand(0); 2735 2736 bool LeftShift = N0.getOpcode() == ISD::SHL; 2737 if ((LeftShift || N0.getOpcode() == ISD::SRL) && 2738 isa<ConstantSDNode>(N0.getOperand(1))) { 2739 uint64_t Mask = N.getConstantOperandVal(1); 2740 unsigned C2 = N0.getConstantOperandVal(1); 2741 2742 unsigned XLen = Subtarget->getXLen(); 2743 if (LeftShift) 2744 Mask &= maskTrailingZeros<uint64_t>(C2); 2745 else 2746 Mask &= maskTrailingOnes<uint64_t>(XLen - C2); 2747 2748 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no 2749 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 2750 // followed by a SHXADD with c3 for the X amount. 2751 if (isShiftedMask_64(Mask)) { 2752 unsigned Leading = XLen - llvm::bit_width(Mask); 2753 unsigned Trailing = llvm::countr_zero(Mask); 2754 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { 2755 SDLoc DL(N); 2756 EVT VT = N.getValueType(); 2757 Val = SDValue(CurDAG->getMachineNode( 2758 RISCV::SRLI, DL, VT, N0.getOperand(0), 2759 CurDAG->getTargetConstant(Trailing - C2, DL, VT)), 2760 0); 2761 return true; 2762 } 2763 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 2764 // leading zeros and c3 trailing zeros. We can use an SRLI by C3 2765 // followed by a SHXADD using c3 for the X amount. 2766 if (!LeftShift && Leading == C2 && Trailing == ShAmt) { 2767 SDLoc DL(N); 2768 EVT VT = N.getValueType(); 2769 Val = SDValue( 2770 CurDAG->getMachineNode( 2771 RISCV::SRLI, DL, VT, N0.getOperand(0), 2772 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), 2773 0); 2774 return true; 2775 } 2776 } 2777 } 2778 } 2779 2780 bool LeftShift = N.getOpcode() == ISD::SHL; 2781 if ((LeftShift || N.getOpcode() == ISD::SRL) && 2782 isa<ConstantSDNode>(N.getOperand(1))) { 2783 SDValue N0 = N.getOperand(0); 2784 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 2785 isa<ConstantSDNode>(N0.getOperand(1))) { 2786 uint64_t Mask = N0.getConstantOperandVal(1); 2787 if (isShiftedMask_64(Mask)) { 2788 unsigned C1 = N.getConstantOperandVal(1); 2789 unsigned XLen = Subtarget->getXLen(); 2790 unsigned Leading = XLen - llvm::bit_width(Mask); 2791 unsigned Trailing = llvm::countr_zero(Mask); 2792 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 2793 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 2794 if (LeftShift && Leading == 32 && Trailing > 0 && 2795 (Trailing + C1) == ShAmt) { 2796 SDLoc DL(N); 2797 EVT VT = N.getValueType(); 2798 Val = SDValue(CurDAG->getMachineNode( 2799 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2800 CurDAG->getTargetConstant(Trailing, DL, VT)), 2801 0); 2802 return true; 2803 } 2804 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 2805 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 2806 if (!LeftShift && Leading == 32 && Trailing > C1 && 2807 (Trailing - C1) == ShAmt) { 2808 SDLoc DL(N); 2809 EVT VT = N.getValueType(); 2810 Val = SDValue(CurDAG->getMachineNode( 2811 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2812 CurDAG->getTargetConstant(Trailing, DL, VT)), 2813 0); 2814 return true; 2815 } 2816 } 2817 } 2818 } 2819 2820 return false; 2821 } 2822 2823 /// Look for various patterns that can be done with a SHL that can be folded 2824 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which 2825 /// SHXADD_UW we are trying to match. 2826 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, 2827 SDValue &Val) { 2828 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) && 2829 N.hasOneUse()) { 2830 SDValue N0 = N.getOperand(0); 2831 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 2832 N0.hasOneUse()) { 2833 uint64_t Mask = N.getConstantOperandVal(1); 2834 unsigned C2 = N0.getConstantOperandVal(1); 2835 2836 Mask &= maskTrailingZeros<uint64_t>(C2); 2837 2838 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 2839 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by 2840 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount. 2841 if (isShiftedMask_64(Mask)) { 2842 unsigned Leading = llvm::countl_zero(Mask); 2843 unsigned Trailing = llvm::countr_zero(Mask); 2844 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) { 2845 SDLoc DL(N); 2846 EVT VT = N.getValueType(); 2847 Val = SDValue(CurDAG->getMachineNode( 2848 RISCV::SLLI, DL, VT, N0.getOperand(0), 2849 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)), 2850 0); 2851 return true; 2852 } 2853 } 2854 } 2855 } 2856 2857 return false; 2858 } 2859 2860 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, 2861 unsigned Bits, 2862 const TargetInstrInfo *TII) { 2863 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode()); 2864 2865 if (!MCOpcode) 2866 return false; 2867 2868 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode()); 2869 const uint64_t TSFlags = MCID.TSFlags; 2870 if (!RISCVII::hasSEWOp(TSFlags)) 2871 return false; 2872 assert(RISCVII::hasVLOp(TSFlags)); 2873 2874 bool HasGlueOp = User->getGluedNode() != nullptr; 2875 unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1; 2876 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other; 2877 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); 2878 unsigned VLIdx = 2879 User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; 2880 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1); 2881 2882 if (UserOpNo == VLIdx) 2883 return false; 2884 2885 auto NumDemandedBits = 2886 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW); 2887 return NumDemandedBits && Bits >= *NumDemandedBits; 2888 } 2889 2890 // Return true if all users of this SDNode* only consume the lower \p Bits. 2891 // This can be used to form W instructions for add/sub/mul/shl even when the 2892 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 2893 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 2894 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 2895 // the add/sub/mul/shl to become non-W instructions. By checking the users we 2896 // may be able to use a W instruction and CSE with the other instruction if 2897 // this has happened. We could try to detect that the CSE opportunity exists 2898 // before doing this, but that would be more complicated. 2899 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, 2900 const unsigned Depth) const { 2901 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2902 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2903 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || 2904 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || 2905 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2906 isa<ConstantSDNode>(Node) || Depth != 0) && 2907 "Unexpected opcode"); 2908 2909 if (Depth >= SelectionDAG::MaxRecursionDepth) 2910 return false; 2911 2912 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked 2913 // the VT. Ensure the type is scalar to avoid wasting time on vectors. 2914 if (Depth == 0 && !Node->getValueType(0).isScalarInteger()) 2915 return false; 2916 2917 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2918 SDNode *User = *UI; 2919 // Users of this node should have already been instruction selected 2920 if (!User->isMachineOpcode()) 2921 return false; 2922 2923 // TODO: Add more opcodes? 2924 switch (User->getMachineOpcode()) { 2925 default: 2926 if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII)) 2927 break; 2928 return false; 2929 case RISCV::ADDW: 2930 case RISCV::ADDIW: 2931 case RISCV::SUBW: 2932 case RISCV::MULW: 2933 case RISCV::SLLW: 2934 case RISCV::SLLIW: 2935 case RISCV::SRAW: 2936 case RISCV::SRAIW: 2937 case RISCV::SRLW: 2938 case RISCV::SRLIW: 2939 case RISCV::DIVW: 2940 case RISCV::DIVUW: 2941 case RISCV::REMW: 2942 case RISCV::REMUW: 2943 case RISCV::ROLW: 2944 case RISCV::RORW: 2945 case RISCV::RORIW: 2946 case RISCV::CLZW: 2947 case RISCV::CTZW: 2948 case RISCV::CPOPW: 2949 case RISCV::SLLI_UW: 2950 case RISCV::FMV_W_X: 2951 case RISCV::FCVT_H_W: 2952 case RISCV::FCVT_H_WU: 2953 case RISCV::FCVT_S_W: 2954 case RISCV::FCVT_S_WU: 2955 case RISCV::FCVT_D_W: 2956 case RISCV::FCVT_D_WU: 2957 case RISCV::TH_REVW: 2958 case RISCV::TH_SRRIW: 2959 if (Bits < 32) 2960 return false; 2961 break; 2962 case RISCV::SLL: 2963 case RISCV::SRA: 2964 case RISCV::SRL: 2965 case RISCV::ROL: 2966 case RISCV::ROR: 2967 case RISCV::BSET: 2968 case RISCV::BCLR: 2969 case RISCV::BINV: 2970 // Shift amount operands only use log2(Xlen) bits. 2971 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen())) 2972 return false; 2973 break; 2974 case RISCV::SLLI: 2975 // SLLI only uses the lower (XLen - ShAmt) bits. 2976 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2977 return false; 2978 break; 2979 case RISCV::ANDI: 2980 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1))) 2981 break; 2982 goto RecCheck; 2983 case RISCV::ORI: { 2984 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue(); 2985 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 2986 break; 2987 [[fallthrough]]; 2988 } 2989 case RISCV::AND: 2990 case RISCV::OR: 2991 case RISCV::XOR: 2992 case RISCV::XORI: 2993 case RISCV::ANDN: 2994 case RISCV::ORN: 2995 case RISCV::XNOR: 2996 case RISCV::SH1ADD: 2997 case RISCV::SH2ADD: 2998 case RISCV::SH3ADD: 2999 RecCheck: 3000 if (hasAllNBitUsers(User, Bits, Depth + 1)) 3001 break; 3002 return false; 3003 case RISCV::SRLI: { 3004 unsigned ShAmt = User->getConstantOperandVal(1); 3005 // If we are shifting right by less than Bits, and users don't demand any 3006 // bits that were shifted into [Bits-1:0], then we can consider this as an 3007 // N-Bit user. 3008 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1)) 3009 break; 3010 return false; 3011 } 3012 case RISCV::SEXT_B: 3013 case RISCV::PACKH: 3014 if (Bits < 8) 3015 return false; 3016 break; 3017 case RISCV::SEXT_H: 3018 case RISCV::FMV_H_X: 3019 case RISCV::ZEXT_H_RV32: 3020 case RISCV::ZEXT_H_RV64: 3021 case RISCV::PACKW: 3022 if (Bits < 16) 3023 return false; 3024 break; 3025 case RISCV::PACK: 3026 if (Bits < (Subtarget->getXLen() / 2)) 3027 return false; 3028 break; 3029 case RISCV::ADD_UW: 3030 case RISCV::SH1ADD_UW: 3031 case RISCV::SH2ADD_UW: 3032 case RISCV::SH3ADD_UW: 3033 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 3034 // 32 bits. 3035 if (UI.getOperandNo() != 0 || Bits < 32) 3036 return false; 3037 break; 3038 case RISCV::SB: 3039 if (UI.getOperandNo() != 0 || Bits < 8) 3040 return false; 3041 break; 3042 case RISCV::SH: 3043 if (UI.getOperandNo() != 0 || Bits < 16) 3044 return false; 3045 break; 3046 case RISCV::SW: 3047 if (UI.getOperandNo() != 0 || Bits < 32) 3048 return false; 3049 break; 3050 } 3051 } 3052 3053 return true; 3054 } 3055 3056 // Select a constant that can be represented as (sign_extend(imm5) << imm2). 3057 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, 3058 SDValue &Shl2) { 3059 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 3060 int64_t Offset = C->getSExtValue(); 3061 int64_t Shift; 3062 for (Shift = 0; Shift < 4; Shift++) 3063 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 3064 break; 3065 3066 // Constant cannot be encoded. 3067 if (Shift == 4) 3068 return false; 3069 3070 EVT Ty = N->getValueType(0); 3071 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty); 3072 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty); 3073 return true; 3074 } 3075 3076 return false; 3077 } 3078 3079 // Select VL as a 5 bit immediate or a value that will become a register. This 3080 // allows us to choose betwen VSETIVLI or VSETVLI later. 3081 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 3082 auto *C = dyn_cast<ConstantSDNode>(N); 3083 if (C && isUInt<5>(C->getZExtValue())) { 3084 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 3085 N->getValueType(0)); 3086 } else if (C && C->isAllOnes()) { 3087 // Treat all ones as VLMax. 3088 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 3089 N->getValueType(0)); 3090 } else if (isa<RegisterSDNode>(N) && 3091 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 3092 // All our VL operands use an operand that allows GPRNoX0 or an immediate 3093 // as the register class. Convert X0 to a special immediate to pass the 3094 // MachineVerifier. This is recognized specially by the vsetvli insertion 3095 // pass. 3096 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 3097 N->getValueType(0)); 3098 } else { 3099 VL = N; 3100 } 3101 3102 return true; 3103 } 3104 3105 static SDValue findVSplat(SDValue N) { 3106 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) { 3107 if (!N.getOperand(0).isUndef()) 3108 return SDValue(); 3109 N = N.getOperand(1); 3110 } 3111 SDValue Splat = N; 3112 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL && 3113 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) || 3114 !Splat.getOperand(0).isUndef()) 3115 return SDValue(); 3116 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands"); 3117 return Splat; 3118 } 3119 3120 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 3121 SDValue Splat = findVSplat(N); 3122 if (!Splat) 3123 return false; 3124 3125 SplatVal = Splat.getOperand(1); 3126 return true; 3127 } 3128 3129 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, 3130 SelectionDAG &DAG, 3131 const RISCVSubtarget &Subtarget, 3132 std::function<bool(int64_t)> ValidateImm) { 3133 SDValue Splat = findVSplat(N); 3134 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1))) 3135 return false; 3136 3137 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits(); 3138 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() && 3139 "Unexpected splat operand type"); 3140 3141 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 3142 // type is wider than the resulting vector element type: an implicit 3143 // truncation first takes place. Therefore, perform a manual 3144 // truncation/sign-extension in order to ignore any truncated bits and catch 3145 // any zero-extended immediate. 3146 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 3147 // sign-extending to (XLenVT -1). 3148 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize); 3149 3150 int64_t SplatImm = SplatConst.getSExtValue(); 3151 3152 if (!ValidateImm(SplatImm)) 3153 return false; 3154 3155 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT()); 3156 return true; 3157 } 3158 3159 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 3160 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget, 3161 [](int64_t Imm) { return isInt<5>(Imm); }); 3162 } 3163 3164 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 3165 return selectVSplatImmHelper( 3166 N, SplatVal, *CurDAG, *Subtarget, 3167 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 3168 } 3169 3170 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 3171 SDValue &SplatVal) { 3172 return selectVSplatImmHelper( 3173 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 3174 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 3175 }); 3176 } 3177 3178 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, 3179 SDValue &SplatVal) { 3180 return selectVSplatImmHelper( 3181 N, SplatVal, *CurDAG, *Subtarget, 3182 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); }); 3183 } 3184 3185 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { 3186 // Truncates are custom lowered during legalization. 3187 auto IsTrunc = [this](SDValue N) { 3188 if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) 3189 return false; 3190 SDValue VL; 3191 selectVLOp(N->getOperand(2), VL); 3192 // Any vmset_vl is ok, since any bits past VL are undefined and we can 3193 // assume they are set. 3194 return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL && 3195 isa<ConstantSDNode>(VL) && 3196 cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel; 3197 }; 3198 3199 // We can have multiple nested truncates, so unravel them all if needed. 3200 while (N->getOpcode() == ISD::SIGN_EXTEND || 3201 N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) { 3202 if (!N.hasOneUse() || 3203 N.getValueType().getSizeInBits().getKnownMinValue() < 8) 3204 return false; 3205 N = N->getOperand(0); 3206 } 3207 3208 return selectVSplat(N, SplatVal); 3209 } 3210 3211 bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { 3212 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode()); 3213 if (!CFP) 3214 return false; 3215 const APFloat &APF = CFP->getValueAPF(); 3216 // td can handle +0.0 already. 3217 if (APF.isPosZero()) 3218 return false; 3219 3220 MVT VT = CFP->getSimpleValueType(0); 3221 3222 // Even if this FPImm requires an additional FNEG (i.e. the second element of 3223 // the returned pair is true) we still prefer FLI + FNEG over immediate 3224 // materialization as the latter might generate a longer instruction sequence. 3225 if (static_cast<const RISCVTargetLowering *>(TLI) 3226 ->getLegalZfaFPImm(APF, VT) 3227 .first >= 0) 3228 return false; 3229 3230 MVT XLenVT = Subtarget->getXLenVT(); 3231 if (VT == MVT::f64 && !Subtarget->is64Bit()) { 3232 assert(APF.isNegZero() && "Unexpected constant."); 3233 return false; 3234 } 3235 SDLoc DL(N); 3236 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 3237 *Subtarget); 3238 return true; 3239 } 3240 3241 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 3242 SDValue &Imm) { 3243 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 3244 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 3245 3246 if (!isInt<5>(ImmVal)) 3247 return false; 3248 3249 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 3250 return true; 3251 } 3252 3253 return false; 3254 } 3255 3256 // Try to remove sext.w if the input is a W instruction or can be made into 3257 // a W instruction cheaply. 3258 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 3259 // Look for the sext.w pattern, addiw rd, rs1, 0. 3260 if (N->getMachineOpcode() != RISCV::ADDIW || 3261 !isNullConstant(N->getOperand(1))) 3262 return false; 3263 3264 SDValue N0 = N->getOperand(0); 3265 if (!N0.isMachineOpcode()) 3266 return false; 3267 3268 switch (N0.getMachineOpcode()) { 3269 default: 3270 break; 3271 case RISCV::ADD: 3272 case RISCV::ADDI: 3273 case RISCV::SUB: 3274 case RISCV::MUL: 3275 case RISCV::SLLI: { 3276 // Convert sext.w+add/sub/mul to their W instructions. This will create 3277 // a new independent instruction. This improves latency. 3278 unsigned Opc; 3279 switch (N0.getMachineOpcode()) { 3280 default: 3281 llvm_unreachable("Unexpected opcode!"); 3282 case RISCV::ADD: Opc = RISCV::ADDW; break; 3283 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 3284 case RISCV::SUB: Opc = RISCV::SUBW; break; 3285 case RISCV::MUL: Opc = RISCV::MULW; break; 3286 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 3287 } 3288 3289 SDValue N00 = N0.getOperand(0); 3290 SDValue N01 = N0.getOperand(1); 3291 3292 // Shift amount needs to be uimm5. 3293 if (N0.getMachineOpcode() == RISCV::SLLI && 3294 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 3295 break; 3296 3297 SDNode *Result = 3298 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 3299 N00, N01); 3300 ReplaceUses(N, Result); 3301 return true; 3302 } 3303 case RISCV::ADDW: 3304 case RISCV::ADDIW: 3305 case RISCV::SUBW: 3306 case RISCV::MULW: 3307 case RISCV::SLLIW: 3308 case RISCV::PACKW: 3309 case RISCV::TH_MULAW: 3310 case RISCV::TH_MULAH: 3311 case RISCV::TH_MULSW: 3312 case RISCV::TH_MULSH: 3313 if (N0.getValueType() == MVT::i32) 3314 break; 3315 3316 // Result is already sign extended just remove the sext.w. 3317 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 3318 ReplaceUses(N, N0.getNode()); 3319 return true; 3320 } 3321 3322 return false; 3323 } 3324 3325 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { 3326 // Check that we're using V0 as a mask register. 3327 if (!isa<RegisterSDNode>(MaskOp) || 3328 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0) 3329 return false; 3330 3331 // The glued user defines V0. 3332 const auto *Glued = GlueOp.getNode(); 3333 3334 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 3335 return false; 3336 3337 // Check that we're defining V0 as a mask register. 3338 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 3339 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 3340 return false; 3341 3342 // Check the instruction defining V0; it needs to be a VMSET pseudo. 3343 SDValue MaskSetter = Glued->getOperand(2); 3344 3345 // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came 3346 // from an extract_subvector or insert_subvector. 3347 if (MaskSetter->isMachineOpcode() && 3348 MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS) 3349 MaskSetter = MaskSetter->getOperand(0); 3350 3351 const auto IsVMSet = [](unsigned Opc) { 3352 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 3353 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 3354 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 3355 Opc == RISCV::PseudoVMSET_M_B8; 3356 }; 3357 3358 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 3359 // undefined behaviour if it's the wrong bitwidth, so we could choose to 3360 // assume that it's all-ones? Same applies to its VL. 3361 return MaskSetter->isMachineOpcode() && 3362 IsVMSet(MaskSetter.getMachineOpcode()); 3363 } 3364 3365 // Return true if we can make sure mask of N is all-ones mask. 3366 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { 3367 return usesAllOnesMask(N->getOperand(MaskOpIdx), 3368 N->getOperand(N->getNumOperands() - 1)); 3369 } 3370 3371 static bool isImplicitDef(SDValue V) { 3372 return V.isMachineOpcode() && 3373 V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; 3374 } 3375 3376 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 3377 // corresponding "unmasked" pseudo versions. The mask we're interested in will 3378 // take the form of a V0 physical register operand, with a glued 3379 // register-setting instruction. 3380 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { 3381 const RISCV::RISCVMaskedPseudoInfo *I = 3382 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 3383 if (!I) 3384 return false; 3385 3386 unsigned MaskOpIdx = I->MaskOpIdx; 3387 if (!usesAllOnesMask(N, MaskOpIdx)) 3388 return false; 3389 3390 // There are two classes of pseudos in the table - compares and 3391 // everything else. See the comment on RISCVMaskedPseudo for details. 3392 const unsigned Opc = I->UnmaskedPseudo; 3393 const MCInstrDesc &MCID = TII->get(Opc); 3394 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags); 3395 #ifndef NDEBUG 3396 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 3397 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) == 3398 RISCVII::hasVecPolicyOp(MCID.TSFlags) && 3399 "Masked and unmasked pseudos are inconsistent"); 3400 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID); 3401 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure"); 3402 #endif 3403 3404 SmallVector<SDValue, 8> Ops; 3405 // Skip the merge operand at index 0 if !UseTUPseudo. 3406 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) { 3407 // Skip the mask, and the Glue. 3408 SDValue Op = N->getOperand(I); 3409 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue) 3410 continue; 3411 Ops.push_back(Op); 3412 } 3413 3414 // Transitively apply any node glued to our new node. 3415 const auto *Glued = N->getGluedNode(); 3416 if (auto *TGlued = Glued->getGluedNode()) 3417 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 3418 3419 MachineSDNode *Result = 3420 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3421 3422 if (!N->memoperands_empty()) 3423 CurDAG->setNodeMemRefs(Result, N->memoperands()); 3424 3425 Result->setFlags(N->getFlags()); 3426 ReplaceUses(N, Result); 3427 3428 return true; 3429 } 3430 3431 static bool IsVMerge(SDNode *N) { 3432 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM; 3433 } 3434 3435 static bool IsVMv(SDNode *N) { 3436 return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V; 3437 } 3438 3439 static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { 3440 switch (LMUL) { 3441 case RISCVII::LMUL_F8: 3442 return RISCV::PseudoVMSET_M_B1; 3443 case RISCVII::LMUL_F4: 3444 return RISCV::PseudoVMSET_M_B2; 3445 case RISCVII::LMUL_F2: 3446 return RISCV::PseudoVMSET_M_B4; 3447 case RISCVII::LMUL_1: 3448 return RISCV::PseudoVMSET_M_B8; 3449 case RISCVII::LMUL_2: 3450 return RISCV::PseudoVMSET_M_B16; 3451 case RISCVII::LMUL_4: 3452 return RISCV::PseudoVMSET_M_B32; 3453 case RISCVII::LMUL_8: 3454 return RISCV::PseudoVMSET_M_B64; 3455 case RISCVII::LMUL_RESERVED: 3456 llvm_unreachable("Unexpected LMUL"); 3457 } 3458 llvm_unreachable("Unknown VLMUL enum"); 3459 } 3460 3461 // Try to fold away VMERGE_VVM instructions. We handle these cases: 3462 // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction 3463 // folds to a masked TU instruction. VMERGE_VVM must have have merge operand 3464 // same as false operand. 3465 // -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a 3466 // masked TA instruction. 3467 // -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to 3468 // masked TU instruction. Both instructions must have the same merge operand. 3469 // VMERGE_VVM must have have merge operand same as false operand. 3470 // Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied, 3471 // not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo 3472 // form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo 3473 // form. 3474 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { 3475 SDValue Merge, False, True, VL, Mask, Glue; 3476 // A vmv.v.v is equivalent to a vmerge with an all-ones mask. 3477 if (IsVMv(N)) { 3478 Merge = N->getOperand(0); 3479 False = N->getOperand(0); 3480 True = N->getOperand(1); 3481 VL = N->getOperand(2); 3482 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones 3483 // mask later below. 3484 } else { 3485 assert(IsVMerge(N)); 3486 Merge = N->getOperand(0); 3487 False = N->getOperand(1); 3488 True = N->getOperand(2); 3489 Mask = N->getOperand(3); 3490 VL = N->getOperand(4); 3491 // We always have a glue node for the mask at v0. 3492 Glue = N->getOperand(N->getNumOperands() - 1); 3493 } 3494 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0); 3495 assert(!Glue || Glue.getValueType() == MVT::Glue); 3496 3497 // We require that either merge and false are the same, or that merge 3498 // is undefined. 3499 if (Merge != False && !isImplicitDef(Merge)) 3500 return false; 3501 3502 assert(True.getResNo() == 0 && 3503 "Expect True is the first output of an instruction."); 3504 3505 // Need N is the exactly one using True. 3506 if (!True.hasOneUse()) 3507 return false; 3508 3509 if (!True.isMachineOpcode()) 3510 return false; 3511 3512 unsigned TrueOpc = True.getMachineOpcode(); 3513 const MCInstrDesc &TrueMCID = TII->get(TrueOpc); 3514 uint64_t TrueTSFlags = TrueMCID.TSFlags; 3515 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); 3516 3517 bool IsMasked = false; 3518 const RISCV::RISCVMaskedPseudoInfo *Info = 3519 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); 3520 if (!Info && HasTiedDest) { 3521 Info = RISCV::getMaskedPseudoInfo(TrueOpc); 3522 IsMasked = true; 3523 } 3524 3525 if (!Info) 3526 return false; 3527 3528 // When Mask is not a true mask, this transformation is illegal for some 3529 // operations whose results are affected by mask, like viota.m. 3530 if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue)) 3531 return false; 3532 3533 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { 3534 // The vmerge instruction must be TU. 3535 // FIXME: This could be relaxed, but we need to handle the policy for the 3536 // resulting op correctly. 3537 if (isImplicitDef(Merge)) 3538 return false; 3539 SDValue MergeOpTrue = True->getOperand(0); 3540 // Both the vmerge instruction and the True instruction must have the same 3541 // merge operand. 3542 if (False != MergeOpTrue) 3543 return false; 3544 } 3545 3546 if (IsMasked) { 3547 assert(HasTiedDest && "Expected tied dest"); 3548 // The vmerge instruction must be TU. 3549 if (isImplicitDef(Merge)) 3550 return false; 3551 // The vmerge instruction must have an all 1s mask since we're going to keep 3552 // the mask from the True instruction. 3553 // FIXME: Support mask agnostic True instruction which would have an 3554 // undef merge operand. 3555 if (Mask && !usesAllOnesMask(Mask, Glue)) 3556 return false; 3557 } 3558 3559 // Skip if True has side effect. 3560 // TODO: Support vleff and vlsegff. 3561 if (TII->get(TrueOpc).hasUnmodeledSideEffects()) 3562 return false; 3563 3564 // The last operand of a masked instruction may be glued. 3565 bool HasGlueOp = True->getGluedNode() != nullptr; 3566 3567 // The chain operand may exist either before the glued operands or in the last 3568 // position. 3569 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; 3570 bool HasChainOp = 3571 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other; 3572 3573 if (HasChainOp) { 3574 // Avoid creating cycles in the DAG. We must ensure that none of the other 3575 // operands depend on True through it's Chain. 3576 SmallVector<const SDNode *, 4> LoopWorklist; 3577 SmallPtrSet<const SDNode *, 16> Visited; 3578 LoopWorklist.push_back(False.getNode()); 3579 if (Mask) 3580 LoopWorklist.push_back(Mask.getNode()); 3581 LoopWorklist.push_back(VL.getNode()); 3582 if (Glue) 3583 LoopWorklist.push_back(Glue.getNode()); 3584 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) 3585 return false; 3586 } 3587 3588 // The vector policy operand may be present for masked intrinsics 3589 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags); 3590 unsigned TrueVLIndex = 3591 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; 3592 SDValue TrueVL = True.getOperand(TrueVLIndex); 3593 SDValue SEW = True.getOperand(TrueVLIndex + 1); 3594 3595 auto GetMinVL = [](SDValue LHS, SDValue RHS) { 3596 if (LHS == RHS) 3597 return LHS; 3598 if (isAllOnesConstant(LHS)) 3599 return RHS; 3600 if (isAllOnesConstant(RHS)) 3601 return LHS; 3602 auto *CLHS = dyn_cast<ConstantSDNode>(LHS); 3603 auto *CRHS = dyn_cast<ConstantSDNode>(RHS); 3604 if (!CLHS || !CRHS) 3605 return SDValue(); 3606 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; 3607 }; 3608 3609 // Because N and True must have the same merge operand (or True's operand is 3610 // implicit_def), the "effective" body is the minimum of their VLs. 3611 SDValue OrigVL = VL; 3612 VL = GetMinVL(TrueVL, VL); 3613 if (!VL) 3614 return false; 3615 3616 // If we end up changing the VL or mask of True, then we need to make sure it 3617 // doesn't raise any observable fp exceptions, since changing the active 3618 // elements will affect how fflags is set. 3619 if (TrueVL != VL || !IsMasked) 3620 if (mayRaiseFPException(True.getNode()) && 3621 !True->getFlags().hasNoFPExcept()) 3622 return false; 3623 3624 SDLoc DL(N); 3625 3626 // From the preconditions we checked above, we know the mask and thus glue 3627 // for the result node will be taken from True. 3628 if (IsMasked) { 3629 Mask = True->getOperand(Info->MaskOpIdx); 3630 Glue = True->getOperand(True->getNumOperands() - 1); 3631 assert(Glue.getValueType() == MVT::Glue); 3632 } 3633 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create 3634 // an all-ones mask to use. 3635 else if (IsVMv(N)) { 3636 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags; 3637 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags)); 3638 ElementCount EC = N->getValueType(0).getVectorElementCount(); 3639 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC); 3640 3641 SDValue AllOnesMask = 3642 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0); 3643 SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 3644 RISCV::V0, AllOnesMask, SDValue()); 3645 Mask = CurDAG->getRegister(RISCV::V0, MaskVT); 3646 Glue = MaskCopy.getValue(1); 3647 } 3648 3649 unsigned MaskedOpc = Info->MaskedPseudo; 3650 #ifndef NDEBUG 3651 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); 3652 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && 3653 "Expected instructions with mask have policy operand."); 3654 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), 3655 MCOI::TIED_TO) == 0 && 3656 "Expected instructions with mask have a tied dest."); 3657 #endif 3658 3659 // Use a tumu policy, relaxing it to tail agnostic provided that the merge 3660 // operand is undefined. 3661 // 3662 // However, if the VL became smaller than what the vmerge had originally, then 3663 // elements past VL that were previously in the vmerge's body will have moved 3664 // to the tail. In that case we always need to use tail undisturbed to 3665 // preserve them. 3666 bool MergeVLShrunk = VL != OrigVL; 3667 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk) 3668 ? RISCVII::TAIL_AGNOSTIC 3669 : /*TUMU*/ 0; 3670 SDValue PolicyOp = 3671 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); 3672 3673 3674 SmallVector<SDValue, 8> Ops; 3675 Ops.push_back(False); 3676 3677 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags); 3678 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode; 3679 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx); 3680 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd); 3681 3682 Ops.push_back(Mask); 3683 3684 // For unmasked "VOp" with rounding mode operand, that is interfaces like 3685 // (..., rm, vl) or (..., rm, vl, policy). 3686 // Its masked version is (..., vm, rm, vl, policy). 3687 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td 3688 if (HasRoundingMode) 3689 Ops.push_back(True->getOperand(TrueVLIndex - 1)); 3690 3691 Ops.append({VL, SEW, PolicyOp}); 3692 3693 // Result node should have chain operand of True. 3694 if (HasChainOp) 3695 Ops.push_back(True.getOperand(TrueChainOpIdx)); 3696 3697 // Add the glue for the CopyToReg of mask->v0. 3698 Ops.push_back(Glue); 3699 3700 MachineSDNode *Result = 3701 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); 3702 Result->setFlags(True->getFlags()); 3703 3704 if (!cast<MachineSDNode>(True)->memoperands_empty()) 3705 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands()); 3706 3707 // Replace vmerge.vvm node by Result. 3708 ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); 3709 3710 // Replace another value of True. E.g. chain and VL. 3711 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) 3712 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); 3713 3714 return true; 3715 } 3716 3717 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { 3718 bool MadeChange = false; 3719 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 3720 3721 while (Position != CurDAG->allnodes_begin()) { 3722 SDNode *N = &*--Position; 3723 if (N->use_empty() || !N->isMachineOpcode()) 3724 continue; 3725 3726 if (IsVMerge(N) || IsVMv(N)) 3727 MadeChange |= performCombineVMergeAndVOps(N); 3728 } 3729 return MadeChange; 3730 } 3731 3732 /// If our passthru is an implicit_def, use noreg instead. This side 3733 /// steps issues with MachineCSE not being able to CSE expressions with 3734 /// IMPLICIT_DEF operands while preserving the semantic intent. See 3735 /// pr64282 for context. Note that this transform is the last one 3736 /// performed at ISEL DAG to DAG. 3737 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { 3738 bool MadeChange = false; 3739 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 3740 3741 while (Position != CurDAG->allnodes_begin()) { 3742 SDNode *N = &*--Position; 3743 if (N->use_empty() || !N->isMachineOpcode()) 3744 continue; 3745 3746 const unsigned Opc = N->getMachineOpcode(); 3747 if (!RISCVVPseudosTable::getPseudoInfo(Opc) || 3748 !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) || 3749 !isImplicitDef(N->getOperand(0))) 3750 continue; 3751 3752 SmallVector<SDValue> Ops; 3753 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0))); 3754 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { 3755 SDValue Op = N->getOperand(I); 3756 Ops.push_back(Op); 3757 } 3758 3759 MachineSDNode *Result = 3760 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3761 Result->setFlags(N->getFlags()); 3762 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands()); 3763 ReplaceUses(N, Result); 3764 MadeChange = true; 3765 } 3766 return MadeChange; 3767 } 3768 3769 3770 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 3771 // for instruction scheduling. 3772 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 3773 CodeGenOptLevel OptLevel) { 3774 return new RISCVDAGToDAGISel(TM, OptLevel); 3775 } 3776 3777 char RISCVDAGToDAGISel::ID = 0; 3778 3779 INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 3780