1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISC-V target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVBaseInfo.h" 15 #include "MCTargetDesc/RISCVMCTargetDesc.h" 16 #include "MCTargetDesc/RISCVMatInt.h" 17 #include "RISCVISelLowering.h" 18 #include "RISCVMachineFunctionInfo.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/IR/IntrinsicsRISCV.h" 21 #include "llvm/Support/Alignment.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include <optional> 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "riscv-isel" 30 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" 31 32 namespace llvm::RISCV { 33 #define GET_RISCVVSSEGTable_IMPL 34 #define GET_RISCVVLSEGTable_IMPL 35 #define GET_RISCVVLXSEGTable_IMPL 36 #define GET_RISCVVSXSEGTable_IMPL 37 #define GET_RISCVVLETable_IMPL 38 #define GET_RISCVVSETable_IMPL 39 #define GET_RISCVVLXTable_IMPL 40 #define GET_RISCVVSXTable_IMPL 41 #define GET_RISCVMaskedPseudosTable_IMPL 42 #include "RISCVGenSearchableTables.inc" 43 } // namespace llvm::RISCV 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 47 48 bool MadeChange = false; 49 while (Position != CurDAG->allnodes_begin()) { 50 SDNode *N = &*--Position; 51 if (N->use_empty()) 52 continue; 53 54 SDValue Result; 55 switch (N->getOpcode()) { 56 case ISD::SPLAT_VECTOR: { 57 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 58 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 59 MVT VT = N->getSimpleValueType(0); 60 unsigned Opc = 61 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 62 SDLoc DL(N); 63 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 64 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 65 N->getOperand(0), VL); 66 break; 67 } 68 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 73 MVT VT = N->getSimpleValueType(0); 74 SDValue Passthru = N->getOperand(0); 75 SDValue Lo = N->getOperand(1); 76 SDValue Hi = N->getOperand(2); 77 SDValue VL = N->getOperand(3); 78 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 79 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 80 "Unexpected VTs!"); 81 MachineFunction &MF = CurDAG->getMachineFunction(); 82 SDLoc DL(N); 83 84 // Create temporary stack for each expanding node. 85 SDValue StackSlot = 86 CurDAG->CreateStackTemporary(TypeSize::Fixed(8), Align(4)); 87 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex(); 88 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 89 90 SDValue Chain = CurDAG->getEntryNode(); 91 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 92 93 SDValue OffsetSlot = 94 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 95 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 96 Align(8)); 97 98 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 99 100 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 101 SDValue IntID = 102 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 103 SDValue Ops[] = {Chain, 104 IntID, 105 Passthru, 106 StackSlot, 107 CurDAG->getRegister(RISCV::X0, MVT::i64), 108 VL}; 109 110 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 111 MVT::i64, MPI, Align(8), 112 MachineMemOperand::MOLoad); 113 break; 114 } 115 } 116 117 if (Result) { 118 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: "); 119 LLVM_DEBUG(N->dump(CurDAG)); 120 LLVM_DEBUG(dbgs() << "\nNew: "); 121 LLVM_DEBUG(Result->dump(CurDAG)); 122 LLVM_DEBUG(dbgs() << "\n"); 123 124 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 125 MadeChange = true; 126 } 127 } 128 129 if (MadeChange) 130 CurDAG->RemoveDeadNodes(); 131 } 132 133 void RISCVDAGToDAGISel::PostprocessISelDAG() { 134 HandleSDNode Dummy(CurDAG->getRoot()); 135 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 136 137 bool MadeChange = false; 138 while (Position != CurDAG->allnodes_begin()) { 139 SDNode *N = &*--Position; 140 // Skip dead nodes and any non-machine opcodes. 141 if (N->use_empty() || !N->isMachineOpcode()) 142 continue; 143 144 MadeChange |= doPeepholeSExtW(N); 145 MadeChange |= doPeepholeMaskedRVV(N); 146 } 147 148 CurDAG->setRoot(Dummy.getValue()); 149 150 MadeChange |= doPeepholeMergeVVMFold(); 151 152 if (MadeChange) 153 CurDAG->RemoveDeadNodes(); 154 } 155 156 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 157 RISCVMatInt::InstSeq &Seq) { 158 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 159 for (const RISCVMatInt::Inst &Inst : Seq) { 160 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT); 161 SDNode *Result = nullptr; 162 switch (Inst.getOpndKind()) { 163 case RISCVMatInt::Imm: 164 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm); 165 break; 166 case RISCVMatInt::RegX0: 167 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, 168 CurDAG->getRegister(RISCV::X0, VT)); 169 break; 170 case RISCVMatInt::RegReg: 171 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg); 172 break; 173 case RISCVMatInt::RegImm: 174 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm); 175 break; 176 } 177 178 // Only the first instruction has X0 as its source. 179 SrcReg = SDValue(Result, 0); 180 } 181 182 return SrcReg; 183 } 184 185 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 186 int64_t Imm, const RISCVSubtarget &Subtarget) { 187 RISCVMatInt::InstSeq Seq = 188 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 189 190 // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at 191 // worst an LUI+ADDIW. This will require an extra register, but avoids a 192 // constant pool. 193 if (Seq.size() > 3) { 194 int64_t LoVal = SignExtend64<32>(Imm); 195 int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32); 196 if (LoVal == HiVal) { 197 RISCVMatInt::InstSeq SeqLo = 198 RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits()); 199 if ((SeqLo.size() + 2) < Seq.size()) { 200 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); 201 202 SDValue SLLI = SDValue( 203 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, 204 CurDAG->getTargetConstant(32, DL, VT)), 205 0); 206 return SDValue(CurDAG->getMachineNode(RISCV::ADD, DL, VT, Lo, SLLI), 207 0); 208 } 209 } 210 } 211 212 // Otherwise, use the original sequence. 213 return selectImmSeq(CurDAG, DL, VT, Seq); 214 } 215 216 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 217 unsigned NF, RISCVII::VLMUL LMUL) { 218 static const unsigned M1TupleRegClassIDs[] = { 219 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 220 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 221 RISCV::VRN8M1RegClassID}; 222 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, 223 RISCV::VRN3M2RegClassID, 224 RISCV::VRN4M2RegClassID}; 225 226 assert(Regs.size() >= 2 && Regs.size() <= 8); 227 228 unsigned RegClassID; 229 unsigned SubReg0; 230 switch (LMUL) { 231 default: 232 llvm_unreachable("Invalid LMUL."); 233 case RISCVII::VLMUL::LMUL_F8: 234 case RISCVII::VLMUL::LMUL_F4: 235 case RISCVII::VLMUL::LMUL_F2: 236 case RISCVII::VLMUL::LMUL_1: 237 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 238 "Unexpected subreg numbering"); 239 SubReg0 = RISCV::sub_vrm1_0; 240 RegClassID = M1TupleRegClassIDs[NF - 2]; 241 break; 242 case RISCVII::VLMUL::LMUL_2: 243 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 244 "Unexpected subreg numbering"); 245 SubReg0 = RISCV::sub_vrm2_0; 246 RegClassID = M2TupleRegClassIDs[NF - 2]; 247 break; 248 case RISCVII::VLMUL::LMUL_4: 249 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 250 "Unexpected subreg numbering"); 251 SubReg0 = RISCV::sub_vrm4_0; 252 RegClassID = RISCV::VRN2M4RegClassID; 253 break; 254 } 255 256 SDLoc DL(Regs[0]); 257 SmallVector<SDValue, 8> Ops; 258 259 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 260 261 for (unsigned I = 0; I < Regs.size(); ++I) { 262 Ops.push_back(Regs[I]); 263 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 264 } 265 SDNode *N = 266 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 267 return SDValue(N, 0); 268 } 269 270 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 271 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 272 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 273 bool IsLoad, MVT *IndexVT) { 274 SDValue Chain = Node->getOperand(0); 275 SDValue Glue; 276 277 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 278 279 if (IsStridedOrIndexed) { 280 Operands.push_back(Node->getOperand(CurOp++)); // Index. 281 if (IndexVT) 282 *IndexVT = Operands.back()->getSimpleValueType(0); 283 } 284 285 if (IsMasked) { 286 // Mask needs to be copied to V0. 287 SDValue Mask = Node->getOperand(CurOp++); 288 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 289 Glue = Chain.getValue(1); 290 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 291 } 292 SDValue VL; 293 selectVLOp(Node->getOperand(CurOp++), VL); 294 Operands.push_back(VL); 295 296 MVT XLenVT = Subtarget->getXLenVT(); 297 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 298 Operands.push_back(SEWOp); 299 300 // At the IR layer, all the masked load intrinsics have policy operands, 301 // none of the others do. All have passthru operands. For our pseudos, 302 // all loads have policy operands. 303 if (IsLoad) { 304 uint64_t Policy = RISCVII::MASK_AGNOSTIC; 305 if (IsMasked) 306 Policy = Node->getConstantOperandVal(CurOp++); 307 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 308 Operands.push_back(PolicyOp); 309 } 310 311 Operands.push_back(Chain); // Chain. 312 if (Glue) 313 Operands.push_back(Glue); 314 } 315 316 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 317 bool IsStrided) { 318 SDLoc DL(Node); 319 unsigned NF = Node->getNumValues() - 1; 320 MVT VT = Node->getSimpleValueType(0); 321 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 322 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 323 324 unsigned CurOp = 2; 325 SmallVector<SDValue, 8> Operands; 326 327 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 328 Node->op_begin() + CurOp + NF); 329 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 330 Operands.push_back(Merge); 331 CurOp += NF; 332 333 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 334 Operands, /*IsLoad=*/true); 335 336 const RISCV::VLSEGPseudo *P = 337 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 338 static_cast<unsigned>(LMUL)); 339 MachineSDNode *Load = 340 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 341 342 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 343 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 344 345 SDValue SuperReg = SDValue(Load, 0); 346 for (unsigned I = 0; I < NF; ++I) { 347 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 348 ReplaceUses(SDValue(Node, I), 349 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 350 } 351 352 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 353 CurDAG->RemoveDeadNode(Node); 354 } 355 356 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 357 SDLoc DL(Node); 358 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 359 MVT VT = Node->getSimpleValueType(0); 360 MVT XLenVT = Subtarget->getXLenVT(); 361 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 362 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 363 364 unsigned CurOp = 2; 365 SmallVector<SDValue, 7> Operands; 366 367 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 368 Node->op_begin() + CurOp + NF); 369 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 370 Operands.push_back(MaskedOff); 371 CurOp += NF; 372 373 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 374 /*IsStridedOrIndexed*/ false, Operands, 375 /*IsLoad=*/true); 376 377 const RISCV::VLSEGPseudo *P = 378 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 379 Log2SEW, static_cast<unsigned>(LMUL)); 380 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 381 XLenVT, MVT::Other, Operands); 382 383 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 384 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 385 386 SDValue SuperReg = SDValue(Load, 0); 387 for (unsigned I = 0; I < NF; ++I) { 388 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 389 ReplaceUses(SDValue(Node, I), 390 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 391 } 392 393 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 394 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 395 CurDAG->RemoveDeadNode(Node); 396 } 397 398 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 399 bool IsOrdered) { 400 SDLoc DL(Node); 401 unsigned NF = Node->getNumValues() - 1; 402 MVT VT = Node->getSimpleValueType(0); 403 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 404 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 405 406 unsigned CurOp = 2; 407 SmallVector<SDValue, 8> Operands; 408 409 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 410 Node->op_begin() + CurOp + NF); 411 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 412 Operands.push_back(MaskedOff); 413 CurOp += NF; 414 415 MVT IndexVT; 416 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 417 /*IsStridedOrIndexed*/ true, Operands, 418 /*IsLoad=*/true, &IndexVT); 419 420 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 421 "Element count mismatch"); 422 423 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 424 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 425 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 426 report_fatal_error("The V extension does not support EEW=64 for index " 427 "values when XLEN=32"); 428 } 429 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 430 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 431 static_cast<unsigned>(IndexLMUL)); 432 MachineSDNode *Load = 433 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 434 435 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 436 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 437 438 SDValue SuperReg = SDValue(Load, 0); 439 for (unsigned I = 0; I < NF; ++I) { 440 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 441 ReplaceUses(SDValue(Node, I), 442 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 443 } 444 445 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 446 CurDAG->RemoveDeadNode(Node); 447 } 448 449 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 450 bool IsStrided) { 451 SDLoc DL(Node); 452 unsigned NF = Node->getNumOperands() - 4; 453 if (IsStrided) 454 NF--; 455 if (IsMasked) 456 NF--; 457 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 458 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 459 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 460 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 461 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 462 463 SmallVector<SDValue, 8> Operands; 464 Operands.push_back(StoreVal); 465 unsigned CurOp = 2 + NF; 466 467 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 468 Operands); 469 470 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 471 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 472 MachineSDNode *Store = 473 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 474 475 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 476 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 477 478 ReplaceNode(Node, Store); 479 } 480 481 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 482 bool IsOrdered) { 483 SDLoc DL(Node); 484 unsigned NF = Node->getNumOperands() - 5; 485 if (IsMasked) 486 --NF; 487 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 488 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 489 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 490 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 491 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 492 493 SmallVector<SDValue, 8> Operands; 494 Operands.push_back(StoreVal); 495 unsigned CurOp = 2 + NF; 496 497 MVT IndexVT; 498 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 499 /*IsStridedOrIndexed*/ true, Operands, 500 /*IsLoad=*/false, &IndexVT); 501 502 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 503 "Element count mismatch"); 504 505 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 506 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 507 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 508 report_fatal_error("The V extension does not support EEW=64 for index " 509 "values when XLEN=32"); 510 } 511 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 512 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 513 static_cast<unsigned>(IndexLMUL)); 514 MachineSDNode *Store = 515 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 516 517 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 518 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 519 520 ReplaceNode(Node, Store); 521 } 522 523 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 524 if (!Subtarget->hasVInstructions()) 525 return; 526 527 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode"); 528 529 SDLoc DL(Node); 530 MVT XLenVT = Subtarget->getXLenVT(); 531 532 unsigned IntNo = Node->getConstantOperandVal(0); 533 534 assert((IntNo == Intrinsic::riscv_vsetvli || 535 IntNo == Intrinsic::riscv_vsetvlimax) && 536 "Unexpected vsetvli intrinsic"); 537 538 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; 539 unsigned Offset = (VLMax ? 1 : 2); 540 541 assert(Node->getNumOperands() == Offset + 2 && 542 "Unexpected number of operands"); 543 544 unsigned SEW = 545 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 546 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 547 Node->getConstantOperandVal(Offset + 1) & 0x7); 548 549 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 550 /*MaskAgnostic*/ true); 551 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 552 553 SDValue VLOperand; 554 unsigned Opcode = RISCV::PseudoVSETVLI; 555 if (VLMax || isAllOnesConstant(Node->getOperand(1))) { 556 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 557 Opcode = RISCV::PseudoVSETVLIX0; 558 } else { 559 VLOperand = Node->getOperand(1); 560 561 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 562 uint64_t AVL = C->getZExtValue(); 563 if (isUInt<5>(AVL)) { 564 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 565 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, 566 XLenVT, VLImm, VTypeIOp)); 567 return; 568 } 569 } 570 } 571 572 ReplaceNode(Node, 573 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp)); 574 } 575 576 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { 577 MVT VT = Node->getSimpleValueType(0); 578 unsigned Opcode = Node->getOpcode(); 579 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) && 580 "Unexpected opcode"); 581 SDLoc DL(Node); 582 583 // For operations of the form (x << C1) op C2, check if we can use 584 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1. 585 SDValue N0 = Node->getOperand(0); 586 SDValue N1 = Node->getOperand(1); 587 588 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 589 if (!Cst) 590 return false; 591 592 int64_t Val = Cst->getSExtValue(); 593 594 // Check if immediate can already use ANDI/ORI/XORI. 595 if (isInt<12>(Val)) 596 return false; 597 598 SDValue Shift = N0; 599 600 // If Val is simm32 and we have a sext_inreg from i32, then the binop 601 // produces at least 33 sign bits. We can peek through the sext_inreg and use 602 // a SLLIW at the end. 603 bool SignExt = false; 604 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 605 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) { 606 SignExt = true; 607 Shift = N0.getOperand(0); 608 } 609 610 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) 611 return false; 612 613 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 614 if (!ShlCst) 615 return false; 616 617 uint64_t ShAmt = ShlCst->getZExtValue(); 618 619 // Make sure that we don't change the operation by removing bits. 620 // This only matters for OR and XOR, AND is unaffected. 621 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt); 622 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 623 return false; 624 625 int64_t ShiftedVal = Val >> ShAmt; 626 if (!isInt<12>(ShiftedVal)) 627 return false; 628 629 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW. 630 if (SignExt && ShAmt >= 32) 631 return false; 632 633 // Ok, we can reorder to get a smaller immediate. 634 unsigned BinOpc; 635 switch (Opcode) { 636 default: llvm_unreachable("Unexpected opcode"); 637 case ISD::AND: BinOpc = RISCV::ANDI; break; 638 case ISD::OR: BinOpc = RISCV::ORI; break; 639 case ISD::XOR: BinOpc = RISCV::XORI; break; 640 } 641 642 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI; 643 644 SDNode *BinOp = 645 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0), 646 CurDAG->getTargetConstant(ShiftedVal, DL, VT)); 647 SDNode *SLLI = 648 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0), 649 CurDAG->getTargetConstant(ShAmt, DL, VT)); 650 ReplaceNode(Node, SLLI); 651 return true; 652 } 653 654 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { 655 // Only supported with XTHeadBb at the moment. 656 if (!Subtarget->hasVendorXTHeadBb()) 657 return false; 658 659 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 660 if (!N1C) 661 return false; 662 663 SDValue N0 = Node->getOperand(0); 664 if (!N0.hasOneUse()) 665 return false; 666 667 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, 668 MVT VT) { 669 return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), 670 CurDAG->getTargetConstant(Msb, DL, VT), 671 CurDAG->getTargetConstant(Lsb, DL, VT)); 672 }; 673 674 SDLoc DL(Node); 675 MVT VT = Node->getSimpleValueType(0); 676 const unsigned RightShAmt = N1C->getZExtValue(); 677 678 // Transform (sra (shl X, C1) C2) with C1 < C2 679 // -> (TH.EXT X, msb, lsb) 680 if (N0.getOpcode() == ISD::SHL) { 681 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 682 if (!N01C) 683 return false; 684 685 const unsigned LeftShAmt = N01C->getZExtValue(); 686 // Make sure that this is a bitfield extraction (i.e., the shift-right 687 // amount can not be less than the left-shift). 688 if (LeftShAmt > RightShAmt) 689 return false; 690 691 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; 692 const unsigned Msb = MsbPlusOne - 1; 693 const unsigned Lsb = RightShAmt - LeftShAmt; 694 695 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); 696 ReplaceNode(Node, TH_EXT); 697 return true; 698 } 699 700 // Transform (sra (sext_inreg X, _), C) -> 701 // (TH.EXT X, msb, lsb) 702 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { 703 unsigned ExtSize = 704 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 705 706 // ExtSize of 32 should use sraiw via tablegen pattern. 707 if (ExtSize == 32) 708 return false; 709 710 const unsigned Msb = ExtSize - 1; 711 const unsigned Lsb = RightShAmt; 712 713 SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); 714 ReplaceNode(Node, TH_EXT); 715 return true; 716 } 717 718 return false; 719 } 720 721 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { 722 // Target does not support indexed loads. 723 if (!Subtarget->hasVendorXTHeadMemIdx()) 724 return false; 725 726 LoadSDNode *Ld = cast<LoadSDNode>(Node); 727 ISD::MemIndexedMode AM = Ld->getAddressingMode(); 728 if (AM == ISD::UNINDEXED) 729 return false; 730 731 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset()); 732 if (!C) 733 return false; 734 735 EVT LoadVT = Ld->getMemoryVT(); 736 bool IsPre = (AM == ISD::PRE_INC || AM == ISD::PRE_DEC); 737 bool IsPost = (AM == ISD::POST_INC || AM == ISD::POST_DEC); 738 int64_t Offset = C->getSExtValue(); 739 740 // Convert decrements to increments by a negative quantity. 741 if (AM == ISD::PRE_DEC || AM == ISD::POST_DEC) 742 Offset = -Offset; 743 744 // The constants that can be encoded in the THeadMemIdx instructions 745 // are of the form (sign_extend(imm5) << imm2). 746 int64_t Shift; 747 for (Shift = 0; Shift < 4; Shift++) 748 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 749 break; 750 751 // Constant cannot be encoded. 752 if (Shift == 4) 753 return false; 754 755 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD); 756 unsigned Opcode; 757 if (LoadVT == MVT::i8 && IsPre) 758 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB; 759 else if (LoadVT == MVT::i8 && IsPost) 760 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA; 761 else if (LoadVT == MVT::i16 && IsPre) 762 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB; 763 else if (LoadVT == MVT::i16 && IsPost) 764 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA; 765 else if (LoadVT == MVT::i32 && IsPre) 766 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB; 767 else if (LoadVT == MVT::i32 && IsPost) 768 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA; 769 else if (LoadVT == MVT::i64 && IsPre) 770 Opcode = RISCV::TH_LDIB; 771 else if (LoadVT == MVT::i64 && IsPost) 772 Opcode = RISCV::TH_LDIA; 773 else 774 return false; 775 776 EVT Ty = Ld->getOffset().getValueType(); 777 SDValue Ops[] = {Ld->getBasePtr(), 778 CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty), 779 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), 780 Ld->getChain()}; 781 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0), 782 Ld->getValueType(1), MVT::Other, Ops); 783 784 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand(); 785 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp}); 786 787 ReplaceNode(Node, New); 788 789 return true; 790 } 791 792 void RISCVDAGToDAGISel::Select(SDNode *Node) { 793 // If we have a custom node, we have already selected. 794 if (Node->isMachineOpcode()) { 795 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 796 Node->setNodeId(-1); 797 return; 798 } 799 800 // Instruction Selection not handled by the auto-generated tablegen selection 801 // should be handled here. 802 unsigned Opcode = Node->getOpcode(); 803 MVT XLenVT = Subtarget->getXLenVT(); 804 SDLoc DL(Node); 805 MVT VT = Node->getSimpleValueType(0); 806 807 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs(); 808 809 switch (Opcode) { 810 case ISD::Constant: { 811 assert(VT == Subtarget->getXLenVT() && "Unexpected VT"); 812 auto *ConstNode = cast<ConstantSDNode>(Node); 813 if (ConstNode->isZero()) { 814 SDValue New = 815 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 816 ReplaceNode(Node, New.getNode()); 817 return; 818 } 819 int64_t Imm = ConstNode->getSExtValue(); 820 // If the upper XLen-16 bits are not used, try to convert this to a simm12 821 // by sign extending bit 15. 822 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 823 hasAllHUsers(Node)) 824 Imm = SignExtend64<16>(Imm); 825 // If the upper 32-bits are not used try to convert this into a simm32 by 826 // sign extending bit 32. 827 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 828 Imm = SignExtend64<32>(Imm); 829 830 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode()); 831 return; 832 } 833 case ISD::ConstantFP: { 834 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF(); 835 int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm( 836 APF, VT); 837 if (FPImm >= 0) { 838 unsigned Opc; 839 switch (VT.SimpleTy) { 840 default: 841 llvm_unreachable("Unexpected size"); 842 case MVT::f16: 843 Opc = RISCV::FLI_H; 844 break; 845 case MVT::f32: 846 Opc = RISCV::FLI_S; 847 break; 848 case MVT::f64: 849 Opc = RISCV::FLI_D; 850 break; 851 } 852 853 SDNode *Res = CurDAG->getMachineNode( 854 Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT)); 855 ReplaceNode(Node, Res); 856 return; 857 } 858 859 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64; 860 SDValue Imm; 861 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will 862 // create an integer immediate. 863 if (APF.isPosZero() || NegZeroF64) 864 Imm = CurDAG->getRegister(RISCV::X0, XLenVT); 865 else 866 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 867 *Subtarget); 868 869 unsigned Opc; 870 switch (VT.SimpleTy) { 871 default: 872 llvm_unreachable("Unexpected size"); 873 case MVT::f16: 874 Opc = 875 Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; 876 break; 877 case MVT::f32: 878 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X; 879 break; 880 case MVT::f64: 881 // For RV32, we can't move from a GPR, we need to convert instead. This 882 // should only happen for +0.0 and -0.0. 883 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant"); 884 bool HasZdinx = Subtarget->hasStdExtZdinx(); 885 if (Subtarget->is64Bit()) 886 Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X; 887 else 888 Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W; 889 break; 890 } 891 892 SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm); 893 894 // For f64 -0.0, we need to insert a fneg.d idiom. 895 if (NegZeroF64) 896 Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0), 897 SDValue(Res, 0)); 898 899 ReplaceNode(Node, Res); 900 return; 901 } 902 case RISCVISD::SplitF64: { 903 if (!Subtarget->hasStdExtZfa()) 904 break; 905 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() && 906 "Unexpected subtarget"); 907 908 // With Zfa, lower to fmv.x.w and fmvh.x.d. 909 if (!SDValue(Node, 0).use_empty()) { 910 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT, 911 Node->getOperand(0)); 912 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0)); 913 } 914 if (!SDValue(Node, 1).use_empty()) { 915 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT, 916 Node->getOperand(0)); 917 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0)); 918 } 919 920 CurDAG->RemoveDeadNode(Node); 921 return; 922 } 923 case ISD::SHL: { 924 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 925 if (!N1C) 926 break; 927 SDValue N0 = Node->getOperand(0); 928 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 929 !isa<ConstantSDNode>(N0.getOperand(1))) 930 break; 931 unsigned ShAmt = N1C->getZExtValue(); 932 uint64_t Mask = N0.getConstantOperandVal(1); 933 934 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has 935 // 32 leading zeros and C3 trailing zeros. 936 if (ShAmt <= 32 && isShiftedMask_64(Mask)) { 937 unsigned XLen = Subtarget->getXLen(); 938 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 939 unsigned TrailingZeros = llvm::countr_zero(Mask); 940 if (TrailingZeros > 0 && LeadingZeros == 32) { 941 SDNode *SRLIW = CurDAG->getMachineNode( 942 RISCV::SRLIW, DL, VT, N0->getOperand(0), 943 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 944 SDNode *SLLI = CurDAG->getMachineNode( 945 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 946 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 947 ReplaceNode(Node, SLLI); 948 return; 949 } 950 } 951 break; 952 } 953 case ISD::SRL: { 954 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 955 if (!N1C) 956 break; 957 SDValue N0 = Node->getOperand(0); 958 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 959 break; 960 unsigned ShAmt = N1C->getZExtValue(); 961 uint64_t Mask = N0.getConstantOperandVal(1); 962 963 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 964 // 32 leading zeros and C3 trailing zeros. 965 if (isShiftedMask_64(Mask) && N0.hasOneUse()) { 966 unsigned XLen = Subtarget->getXLen(); 967 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 968 unsigned TrailingZeros = llvm::countr_zero(Mask); 969 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 970 SDNode *SRLIW = CurDAG->getMachineNode( 971 RISCV::SRLIW, DL, VT, N0->getOperand(0), 972 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 973 SDNode *SLLI = CurDAG->getMachineNode( 974 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 975 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 976 ReplaceNode(Node, SLLI); 977 return; 978 } 979 } 980 981 // Optimize (srl (and X, C2), C) -> 982 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 983 // Where C2 is a mask with C3 trailing ones. 984 // Taking into account that the C2 may have had lower bits unset by 985 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 986 // This pattern occurs when type legalizing right shifts for types with 987 // less than XLen bits. 988 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 989 if (!isMask_64(Mask)) 990 break; 991 unsigned TrailingOnes = llvm::countr_one(Mask); 992 if (ShAmt >= TrailingOnes) 993 break; 994 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64. 995 if (TrailingOnes == 32) { 996 SDNode *SRLI = CurDAG->getMachineNode( 997 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT, 998 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 999 ReplaceNode(Node, SRLI); 1000 return; 1001 } 1002 1003 // Only do the remaining transforms if the AND has one use. 1004 if (!N0.hasOneUse()) 1005 break; 1006 1007 // If C2 is (1 << ShAmt) use bexti or th.tst if possible. 1008 if (HasBitTest && ShAmt + 1 == TrailingOnes) { 1009 SDNode *BEXTI = CurDAG->getMachineNode( 1010 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT, 1011 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1012 ReplaceNode(Node, BEXTI); 1013 return; 1014 } 1015 1016 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 1017 SDNode *SLLI = 1018 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1019 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1020 SDNode *SRLI = CurDAG->getMachineNode( 1021 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1022 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1023 ReplaceNode(Node, SRLI); 1024 return; 1025 } 1026 case ISD::SRA: { 1027 if (trySignedBitfieldExtract(Node)) 1028 return; 1029 1030 // Optimize (sra (sext_inreg X, i16), C) -> 1031 // (srai (slli X, (XLen-16), (XLen-16) + C) 1032 // And (sra (sext_inreg X, i8), C) -> 1033 // (srai (slli X, (XLen-8), (XLen-8) + C) 1034 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 1035 // This transform matches the code we get without Zbb. The shifts are more 1036 // compressible, and this can help expose CSE opportunities in the sdiv by 1037 // constant optimization. 1038 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1039 if (!N1C) 1040 break; 1041 SDValue N0 = Node->getOperand(0); 1042 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 1043 break; 1044 unsigned ShAmt = N1C->getZExtValue(); 1045 unsigned ExtSize = 1046 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 1047 // ExtSize of 32 should use sraiw via tablegen pattern. 1048 if (ExtSize >= 32 || ShAmt >= ExtSize) 1049 break; 1050 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 1051 SDNode *SLLI = 1052 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1053 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1054 SDNode *SRAI = CurDAG->getMachineNode( 1055 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 1056 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1057 ReplaceNode(Node, SRAI); 1058 return; 1059 } 1060 case ISD::OR: 1061 case ISD::XOR: 1062 if (tryShrinkShlLogicImm(Node)) 1063 return; 1064 1065 break; 1066 case ISD::AND: { 1067 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1068 if (!N1C) 1069 break; 1070 uint64_t C1 = N1C->getZExtValue(); 1071 const bool isC1Mask = isMask_64(C1); 1072 const bool isC1ANDI = isInt<12>(C1); 1073 1074 SDValue N0 = Node->getOperand(0); 1075 1076 auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT, 1077 SDValue X, unsigned Msb, 1078 unsigned Lsb) { 1079 if (!Subtarget->hasVendorXTHeadBb()) 1080 return false; 1081 1082 SDNode *TH_EXTU = CurDAG->getMachineNode( 1083 RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), 1084 CurDAG->getTargetConstant(Lsb, DL, VT)); 1085 ReplaceNode(Node, TH_EXTU); 1086 return true; 1087 }; 1088 1089 bool LeftShift = N0.getOpcode() == ISD::SHL; 1090 if (LeftShift || N0.getOpcode() == ISD::SRL) { 1091 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 1092 if (!C) 1093 break; 1094 unsigned C2 = C->getZExtValue(); 1095 unsigned XLen = Subtarget->getXLen(); 1096 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 1097 1098 // Keep track of whether this is a c.andi. If we can't use c.andi, the 1099 // shift pair might offer more compression opportunities. 1100 // TODO: We could check for C extension here, but we don't have many lit 1101 // tests with the C extension enabled so not checking gets better 1102 // coverage. 1103 // TODO: What if ANDI faster than shift? 1104 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 1105 1106 // Clear irrelevant bits in the mask. 1107 if (LeftShift) 1108 C1 &= maskTrailingZeros<uint64_t>(C2); 1109 else 1110 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 1111 1112 // Some transforms should only be done if the shift has a single use or 1113 // the AND would become (srli (slli X, 32), 32) 1114 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 1115 1116 SDValue X = N0.getOperand(0); 1117 1118 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 1119 // with c3 leading zeros. 1120 if (!LeftShift && isC1Mask) { 1121 unsigned Leading = XLen - llvm::bit_width(C1); 1122 if (C2 < Leading) { 1123 // If the number of leading zeros is C2+32 this can be SRLIW. 1124 if (C2 + 32 == Leading) { 1125 SDNode *SRLIW = CurDAG->getMachineNode( 1126 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 1127 ReplaceNode(Node, SRLIW); 1128 return; 1129 } 1130 1131 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) 1132 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 1133 // 1134 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 1135 // legalized and goes through DAG combine. 1136 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 1137 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1138 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 1139 SDNode *SRAIW = 1140 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 1141 CurDAG->getTargetConstant(31, DL, VT)); 1142 SDNode *SRLIW = CurDAG->getMachineNode( 1143 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 1144 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 1145 ReplaceNode(Node, SRLIW); 1146 return; 1147 } 1148 1149 // Try to use an unsigned bitfield extract (e.g., th.extu) if 1150 // available. 1151 // Transform (and (srl x, C2), C1) 1152 // -> (<bfextract> x, msb, lsb) 1153 // 1154 // Make sure to keep this below the SRLIW cases, as we always want to 1155 // prefer the more common instruction. 1156 const unsigned Msb = llvm::bit_width(C1) + C2 - 1; 1157 const unsigned Lsb = C2; 1158 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) 1159 return; 1160 1161 // (srli (slli x, c3-c2), c3). 1162 // Skip if we could use (zext.w (sraiw X, C2)). 1163 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 1164 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1165 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 1166 // Also Skip if we can use bexti or th.tst. 1167 Skip |= HasBitTest && Leading == XLen - 1; 1168 if (OneUseOrZExtW && !Skip) { 1169 SDNode *SLLI = CurDAG->getMachineNode( 1170 RISCV::SLLI, DL, VT, X, 1171 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 1172 SDNode *SRLI = CurDAG->getMachineNode( 1173 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1174 CurDAG->getTargetConstant(Leading, DL, VT)); 1175 ReplaceNode(Node, SRLI); 1176 return; 1177 } 1178 } 1179 } 1180 1181 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 1182 // shifted by c2 bits with c3 leading zeros. 1183 if (LeftShift && isShiftedMask_64(C1)) { 1184 unsigned Leading = XLen - llvm::bit_width(C1); 1185 1186 if (C2 + Leading < XLen && 1187 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 1188 // Use slli.uw when possible. 1189 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 1190 SDNode *SLLI_UW = 1191 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X, 1192 CurDAG->getTargetConstant(C2, DL, VT)); 1193 ReplaceNode(Node, SLLI_UW); 1194 return; 1195 } 1196 1197 // (srli (slli c2+c3), c3) 1198 if (OneUseOrZExtW && !IsCANDI) { 1199 SDNode *SLLI = CurDAG->getMachineNode( 1200 RISCV::SLLI, DL, VT, X, 1201 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 1202 SDNode *SRLI = CurDAG->getMachineNode( 1203 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1204 CurDAG->getTargetConstant(Leading, DL, VT)); 1205 ReplaceNode(Node, SRLI); 1206 return; 1207 } 1208 } 1209 } 1210 1211 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 1212 // shifted mask with c2 leading zeros and c3 trailing zeros. 1213 if (!LeftShift && isShiftedMask_64(C1)) { 1214 unsigned Leading = XLen - llvm::bit_width(C1); 1215 unsigned Trailing = llvm::countr_zero(C1); 1216 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && 1217 !IsCANDI) { 1218 unsigned SrliOpc = RISCV::SRLI; 1219 // If the input is zexti32 we should use SRLIW. 1220 if (X.getOpcode() == ISD::AND && 1221 isa<ConstantSDNode>(X.getOperand(1)) && 1222 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 1223 SrliOpc = RISCV::SRLIW; 1224 X = X.getOperand(0); 1225 } 1226 SDNode *SRLI = CurDAG->getMachineNode( 1227 SrliOpc, DL, VT, X, 1228 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1229 SDNode *SLLI = CurDAG->getMachineNode( 1230 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1231 CurDAG->getTargetConstant(Trailing, DL, VT)); 1232 ReplaceNode(Node, SLLI); 1233 return; 1234 } 1235 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 1236 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 1237 OneUseOrZExtW && !IsCANDI) { 1238 SDNode *SRLIW = CurDAG->getMachineNode( 1239 RISCV::SRLIW, DL, VT, X, 1240 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1241 SDNode *SLLI = CurDAG->getMachineNode( 1242 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1243 CurDAG->getTargetConstant(Trailing, DL, VT)); 1244 ReplaceNode(Node, SLLI); 1245 return; 1246 } 1247 } 1248 1249 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 1250 // shifted mask with no leading zeros and c3 trailing zeros. 1251 if (LeftShift && isShiftedMask_64(C1)) { 1252 unsigned Leading = XLen - llvm::bit_width(C1); 1253 unsigned Trailing = llvm::countr_zero(C1); 1254 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 1255 SDNode *SRLI = CurDAG->getMachineNode( 1256 RISCV::SRLI, DL, VT, X, 1257 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1258 SDNode *SLLI = CurDAG->getMachineNode( 1259 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1260 CurDAG->getTargetConstant(Trailing, DL, VT)); 1261 ReplaceNode(Node, SLLI); 1262 return; 1263 } 1264 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 1265 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 1266 SDNode *SRLIW = CurDAG->getMachineNode( 1267 RISCV::SRLIW, DL, VT, X, 1268 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1269 SDNode *SLLI = CurDAG->getMachineNode( 1270 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1271 CurDAG->getTargetConstant(Trailing, DL, VT)); 1272 ReplaceNode(Node, SLLI); 1273 return; 1274 } 1275 } 1276 } 1277 1278 // If C1 masks off the upper bits only (but can't be formed as an 1279 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if 1280 // available. 1281 // Transform (and x, C1) 1282 // -> (<bfextract> x, msb, lsb) 1283 if (isC1Mask && !isC1ANDI) { 1284 const unsigned Msb = llvm::bit_width(C1) - 1; 1285 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) 1286 return; 1287 } 1288 1289 if (tryShrinkShlLogicImm(Node)) 1290 return; 1291 1292 break; 1293 } 1294 case ISD::MUL: { 1295 // Special case for calculating (mul (and X, C2), C1) where the full product 1296 // fits in XLen bits. We can shift X left by the number of leading zeros in 1297 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1298 // product has XLen trailing zeros, putting it in the output of MULHU. This 1299 // can avoid materializing a constant in a register for C2. 1300 1301 // RHS should be a constant. 1302 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1303 if (!N1C || !N1C->hasOneUse()) 1304 break; 1305 1306 // LHS should be an AND with constant. 1307 SDValue N0 = Node->getOperand(0); 1308 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1309 break; 1310 1311 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 1312 1313 // Constant should be a mask. 1314 if (!isMask_64(C2)) 1315 break; 1316 1317 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has 1318 // multiple users or the constant is a simm12. This prevents inserting a 1319 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely 1320 // make it more costly to materialize. Otherwise, using a SLLI might allow 1321 // it to be compressed. 1322 bool IsANDIOrZExt = 1323 isInt<12>(C2) || 1324 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()); 1325 // With XTHeadBb, we can use TH.EXTU. 1326 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb(); 1327 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) 1328 break; 1329 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or 1330 // the constant is a simm32. 1331 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba(); 1332 // With XTHeadBb, we can use TH.EXTU. 1333 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb(); 1334 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse())) 1335 break; 1336 1337 // We need to shift left the AND input and C1 by a total of XLen bits. 1338 1339 // How far left do we need to shift the AND input? 1340 unsigned XLen = Subtarget->getXLen(); 1341 unsigned LeadingZeros = XLen - llvm::bit_width(C2); 1342 1343 // The constant gets shifted by the remaining amount unless that would 1344 // shift bits out. 1345 uint64_t C1 = N1C->getZExtValue(); 1346 unsigned ConstantShift = XLen - LeadingZeros; 1347 if (ConstantShift > (XLen - llvm::bit_width(C1))) 1348 break; 1349 1350 uint64_t ShiftedC1 = C1 << ConstantShift; 1351 // If this RV32, we need to sign extend the constant. 1352 if (XLen == 32) 1353 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1354 1355 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1356 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode(); 1357 SDNode *SLLI = 1358 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1359 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1360 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1361 SDValue(SLLI, 0), SDValue(Imm, 0)); 1362 ReplaceNode(Node, MULHU); 1363 return; 1364 } 1365 case ISD::LOAD: { 1366 if (tryIndexedLoad(Node)) 1367 return; 1368 break; 1369 } 1370 case ISD::INTRINSIC_WO_CHAIN: { 1371 unsigned IntNo = Node->getConstantOperandVal(0); 1372 switch (IntNo) { 1373 // By default we do not custom select any intrinsic. 1374 default: 1375 break; 1376 case Intrinsic::riscv_vmsgeu: 1377 case Intrinsic::riscv_vmsge: { 1378 SDValue Src1 = Node->getOperand(1); 1379 SDValue Src2 = Node->getOperand(2); 1380 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1381 bool IsCmpUnsignedZero = false; 1382 // Only custom select scalar second operand. 1383 if (Src2.getValueType() != XLenVT) 1384 break; 1385 // Small constants are handled with patterns. 1386 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1387 int64_t CVal = C->getSExtValue(); 1388 if (CVal >= -15 && CVal <= 16) { 1389 if (!IsUnsigned || CVal != 0) 1390 break; 1391 IsCmpUnsignedZero = true; 1392 } 1393 } 1394 MVT Src1VT = Src1.getSimpleValueType(); 1395 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1396 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1397 default: 1398 llvm_unreachable("Unexpected LMUL!"); 1399 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1400 case RISCVII::VLMUL::lmulenum: \ 1401 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1402 : RISCV::PseudoVMSLT_VX_##suffix; \ 1403 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1404 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1405 break; 1406 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1407 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1408 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1409 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1410 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1411 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1412 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1413 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1414 } 1415 SDValue SEW = CurDAG->getTargetConstant( 1416 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1417 SDValue VL; 1418 selectVLOp(Node->getOperand(3), VL); 1419 1420 // If vmsgeu with 0 immediate, expand it to vmset. 1421 if (IsCmpUnsignedZero) { 1422 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1423 return; 1424 } 1425 1426 // Expand to 1427 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1428 SDValue Cmp = SDValue( 1429 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1430 0); 1431 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1432 {Cmp, Cmp, VL, SEW})); 1433 return; 1434 } 1435 case Intrinsic::riscv_vmsgeu_mask: 1436 case Intrinsic::riscv_vmsge_mask: { 1437 SDValue Src1 = Node->getOperand(2); 1438 SDValue Src2 = Node->getOperand(3); 1439 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1440 bool IsCmpUnsignedZero = false; 1441 // Only custom select scalar second operand. 1442 if (Src2.getValueType() != XLenVT) 1443 break; 1444 // Small constants are handled with patterns. 1445 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1446 int64_t CVal = C->getSExtValue(); 1447 if (CVal >= -15 && CVal <= 16) { 1448 if (!IsUnsigned || CVal != 0) 1449 break; 1450 IsCmpUnsignedZero = true; 1451 } 1452 } 1453 MVT Src1VT = Src1.getSimpleValueType(); 1454 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1455 VMOROpcode; 1456 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1457 default: 1458 llvm_unreachable("Unexpected LMUL!"); 1459 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1460 case RISCVII::VLMUL::lmulenum: \ 1461 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1462 : RISCV::PseudoVMSLT_VX_##suffix; \ 1463 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1464 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1465 break; 1466 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1467 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1468 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1469 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1470 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1471 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1472 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1473 #undef CASE_VMSLT_OPCODES 1474 } 1475 // Mask operations use the LMUL from the mask type. 1476 switch (RISCVTargetLowering::getLMUL(VT)) { 1477 default: 1478 llvm_unreachable("Unexpected LMUL!"); 1479 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1480 case RISCVII::VLMUL::lmulenum: \ 1481 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1482 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1483 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1484 break; 1485 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1486 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1487 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1488 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1489 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1490 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1491 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1492 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1493 } 1494 SDValue SEW = CurDAG->getTargetConstant( 1495 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1496 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1497 SDValue VL; 1498 selectVLOp(Node->getOperand(5), VL); 1499 SDValue MaskedOff = Node->getOperand(1); 1500 SDValue Mask = Node->getOperand(4); 1501 1502 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1503 if (IsCmpUnsignedZero) { 1504 // We don't need vmor if the MaskedOff and the Mask are the same 1505 // value. 1506 if (Mask == MaskedOff) { 1507 ReplaceUses(Node, Mask.getNode()); 1508 return; 1509 } 1510 ReplaceNode(Node, 1511 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1512 {Mask, MaskedOff, VL, MaskSEW})); 1513 return; 1514 } 1515 1516 // If the MaskedOff value and the Mask are the same value use 1517 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1518 // This avoids needing to copy v0 to vd before starting the next sequence. 1519 if (Mask == MaskedOff) { 1520 SDValue Cmp = SDValue( 1521 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1522 0); 1523 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1524 {Mask, Cmp, VL, MaskSEW})); 1525 return; 1526 } 1527 1528 // Mask needs to be copied to V0. 1529 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1530 RISCV::V0, Mask, SDValue()); 1531 SDValue Glue = Chain.getValue(1); 1532 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1533 1534 // Otherwise use 1535 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1536 // The result is mask undisturbed. 1537 // We use the same instructions to emulate mask agnostic behavior, because 1538 // the agnostic result can be either undisturbed or all 1. 1539 SDValue Cmp = SDValue( 1540 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1541 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1542 0); 1543 // vmxor.mm vd, vd, v0 is used to update active value. 1544 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1545 {Cmp, Mask, VL, MaskSEW})); 1546 return; 1547 } 1548 case Intrinsic::riscv_vsetvli: 1549 case Intrinsic::riscv_vsetvlimax: 1550 return selectVSETVLI(Node); 1551 } 1552 break; 1553 } 1554 case ISD::INTRINSIC_W_CHAIN: { 1555 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1556 switch (IntNo) { 1557 // By default we do not custom select any intrinsic. 1558 default: 1559 break; 1560 case Intrinsic::riscv_vlseg2: 1561 case Intrinsic::riscv_vlseg3: 1562 case Intrinsic::riscv_vlseg4: 1563 case Intrinsic::riscv_vlseg5: 1564 case Intrinsic::riscv_vlseg6: 1565 case Intrinsic::riscv_vlseg7: 1566 case Intrinsic::riscv_vlseg8: { 1567 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1568 return; 1569 } 1570 case Intrinsic::riscv_vlseg2_mask: 1571 case Intrinsic::riscv_vlseg3_mask: 1572 case Intrinsic::riscv_vlseg4_mask: 1573 case Intrinsic::riscv_vlseg5_mask: 1574 case Intrinsic::riscv_vlseg6_mask: 1575 case Intrinsic::riscv_vlseg7_mask: 1576 case Intrinsic::riscv_vlseg8_mask: { 1577 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1578 return; 1579 } 1580 case Intrinsic::riscv_vlsseg2: 1581 case Intrinsic::riscv_vlsseg3: 1582 case Intrinsic::riscv_vlsseg4: 1583 case Intrinsic::riscv_vlsseg5: 1584 case Intrinsic::riscv_vlsseg6: 1585 case Intrinsic::riscv_vlsseg7: 1586 case Intrinsic::riscv_vlsseg8: { 1587 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1588 return; 1589 } 1590 case Intrinsic::riscv_vlsseg2_mask: 1591 case Intrinsic::riscv_vlsseg3_mask: 1592 case Intrinsic::riscv_vlsseg4_mask: 1593 case Intrinsic::riscv_vlsseg5_mask: 1594 case Intrinsic::riscv_vlsseg6_mask: 1595 case Intrinsic::riscv_vlsseg7_mask: 1596 case Intrinsic::riscv_vlsseg8_mask: { 1597 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1598 return; 1599 } 1600 case Intrinsic::riscv_vloxseg2: 1601 case Intrinsic::riscv_vloxseg3: 1602 case Intrinsic::riscv_vloxseg4: 1603 case Intrinsic::riscv_vloxseg5: 1604 case Intrinsic::riscv_vloxseg6: 1605 case Intrinsic::riscv_vloxseg7: 1606 case Intrinsic::riscv_vloxseg8: 1607 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1608 return; 1609 case Intrinsic::riscv_vluxseg2: 1610 case Intrinsic::riscv_vluxseg3: 1611 case Intrinsic::riscv_vluxseg4: 1612 case Intrinsic::riscv_vluxseg5: 1613 case Intrinsic::riscv_vluxseg6: 1614 case Intrinsic::riscv_vluxseg7: 1615 case Intrinsic::riscv_vluxseg8: 1616 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1617 return; 1618 case Intrinsic::riscv_vloxseg2_mask: 1619 case Intrinsic::riscv_vloxseg3_mask: 1620 case Intrinsic::riscv_vloxseg4_mask: 1621 case Intrinsic::riscv_vloxseg5_mask: 1622 case Intrinsic::riscv_vloxseg6_mask: 1623 case Intrinsic::riscv_vloxseg7_mask: 1624 case Intrinsic::riscv_vloxseg8_mask: 1625 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1626 return; 1627 case Intrinsic::riscv_vluxseg2_mask: 1628 case Intrinsic::riscv_vluxseg3_mask: 1629 case Intrinsic::riscv_vluxseg4_mask: 1630 case Intrinsic::riscv_vluxseg5_mask: 1631 case Intrinsic::riscv_vluxseg6_mask: 1632 case Intrinsic::riscv_vluxseg7_mask: 1633 case Intrinsic::riscv_vluxseg8_mask: 1634 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1635 return; 1636 case Intrinsic::riscv_vlseg8ff: 1637 case Intrinsic::riscv_vlseg7ff: 1638 case Intrinsic::riscv_vlseg6ff: 1639 case Intrinsic::riscv_vlseg5ff: 1640 case Intrinsic::riscv_vlseg4ff: 1641 case Intrinsic::riscv_vlseg3ff: 1642 case Intrinsic::riscv_vlseg2ff: { 1643 selectVLSEGFF(Node, /*IsMasked*/ false); 1644 return; 1645 } 1646 case Intrinsic::riscv_vlseg8ff_mask: 1647 case Intrinsic::riscv_vlseg7ff_mask: 1648 case Intrinsic::riscv_vlseg6ff_mask: 1649 case Intrinsic::riscv_vlseg5ff_mask: 1650 case Intrinsic::riscv_vlseg4ff_mask: 1651 case Intrinsic::riscv_vlseg3ff_mask: 1652 case Intrinsic::riscv_vlseg2ff_mask: { 1653 selectVLSEGFF(Node, /*IsMasked*/ true); 1654 return; 1655 } 1656 case Intrinsic::riscv_vloxei: 1657 case Intrinsic::riscv_vloxei_mask: 1658 case Intrinsic::riscv_vluxei: 1659 case Intrinsic::riscv_vluxei_mask: { 1660 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1661 IntNo == Intrinsic::riscv_vluxei_mask; 1662 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1663 IntNo == Intrinsic::riscv_vloxei_mask; 1664 1665 MVT VT = Node->getSimpleValueType(0); 1666 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1667 1668 unsigned CurOp = 2; 1669 SmallVector<SDValue, 8> Operands; 1670 Operands.push_back(Node->getOperand(CurOp++)); 1671 1672 MVT IndexVT; 1673 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1674 /*IsStridedOrIndexed*/ true, Operands, 1675 /*IsLoad=*/true, &IndexVT); 1676 1677 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1678 "Element count mismatch"); 1679 1680 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1681 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1682 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1683 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1684 report_fatal_error("The V extension does not support EEW=64 for index " 1685 "values when XLEN=32"); 1686 } 1687 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1688 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1689 static_cast<unsigned>(IndexLMUL)); 1690 MachineSDNode *Load = 1691 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1692 1693 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1694 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1695 1696 ReplaceNode(Node, Load); 1697 return; 1698 } 1699 case Intrinsic::riscv_vlm: 1700 case Intrinsic::riscv_vle: 1701 case Intrinsic::riscv_vle_mask: 1702 case Intrinsic::riscv_vlse: 1703 case Intrinsic::riscv_vlse_mask: { 1704 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1705 IntNo == Intrinsic::riscv_vlse_mask; 1706 bool IsStrided = 1707 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1708 1709 MVT VT = Node->getSimpleValueType(0); 1710 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1711 1712 // The riscv_vlm intrinsic are always tail agnostic and no passthru 1713 // operand at the IR level. In pseudos, they have both policy and 1714 // passthru operand. The passthru operand is needed to track the 1715 // "tail undefined" state, and the policy is there just for 1716 // for consistency - it will always be "don't care" for the 1717 // unmasked form. 1718 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1719 unsigned CurOp = 2; 1720 SmallVector<SDValue, 8> Operands; 1721 if (HasPassthruOperand) 1722 Operands.push_back(Node->getOperand(CurOp++)); 1723 else { 1724 // We eagerly lower to implicit_def (instead of undef), as we 1725 // otherwise fail to select nodes such as: nxv1i1 = undef 1726 SDNode *Passthru = 1727 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 1728 Operands.push_back(SDValue(Passthru, 0)); 1729 } 1730 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1731 Operands, /*IsLoad=*/true); 1732 1733 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1734 const RISCV::VLEPseudo *P = 1735 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, 1736 static_cast<unsigned>(LMUL)); 1737 MachineSDNode *Load = 1738 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1739 1740 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1741 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1742 1743 ReplaceNode(Node, Load); 1744 return; 1745 } 1746 case Intrinsic::riscv_vleff: 1747 case Intrinsic::riscv_vleff_mask: { 1748 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1749 1750 MVT VT = Node->getSimpleValueType(0); 1751 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1752 1753 unsigned CurOp = 2; 1754 SmallVector<SDValue, 7> Operands; 1755 Operands.push_back(Node->getOperand(CurOp++)); 1756 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1757 /*IsStridedOrIndexed*/ false, Operands, 1758 /*IsLoad=*/true); 1759 1760 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1761 const RISCV::VLEPseudo *P = 1762 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, 1763 Log2SEW, static_cast<unsigned>(LMUL)); 1764 MachineSDNode *Load = CurDAG->getMachineNode( 1765 P->Pseudo, DL, Node->getVTList(), Operands); 1766 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1767 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1768 1769 ReplaceNode(Node, Load); 1770 return; 1771 } 1772 } 1773 break; 1774 } 1775 case ISD::INTRINSIC_VOID: { 1776 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1777 switch (IntNo) { 1778 case Intrinsic::riscv_vsseg2: 1779 case Intrinsic::riscv_vsseg3: 1780 case Intrinsic::riscv_vsseg4: 1781 case Intrinsic::riscv_vsseg5: 1782 case Intrinsic::riscv_vsseg6: 1783 case Intrinsic::riscv_vsseg7: 1784 case Intrinsic::riscv_vsseg8: { 1785 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1786 return; 1787 } 1788 case Intrinsic::riscv_vsseg2_mask: 1789 case Intrinsic::riscv_vsseg3_mask: 1790 case Intrinsic::riscv_vsseg4_mask: 1791 case Intrinsic::riscv_vsseg5_mask: 1792 case Intrinsic::riscv_vsseg6_mask: 1793 case Intrinsic::riscv_vsseg7_mask: 1794 case Intrinsic::riscv_vsseg8_mask: { 1795 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1796 return; 1797 } 1798 case Intrinsic::riscv_vssseg2: 1799 case Intrinsic::riscv_vssseg3: 1800 case Intrinsic::riscv_vssseg4: 1801 case Intrinsic::riscv_vssseg5: 1802 case Intrinsic::riscv_vssseg6: 1803 case Intrinsic::riscv_vssseg7: 1804 case Intrinsic::riscv_vssseg8: { 1805 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1806 return; 1807 } 1808 case Intrinsic::riscv_vssseg2_mask: 1809 case Intrinsic::riscv_vssseg3_mask: 1810 case Intrinsic::riscv_vssseg4_mask: 1811 case Intrinsic::riscv_vssseg5_mask: 1812 case Intrinsic::riscv_vssseg6_mask: 1813 case Intrinsic::riscv_vssseg7_mask: 1814 case Intrinsic::riscv_vssseg8_mask: { 1815 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1816 return; 1817 } 1818 case Intrinsic::riscv_vsoxseg2: 1819 case Intrinsic::riscv_vsoxseg3: 1820 case Intrinsic::riscv_vsoxseg4: 1821 case Intrinsic::riscv_vsoxseg5: 1822 case Intrinsic::riscv_vsoxseg6: 1823 case Intrinsic::riscv_vsoxseg7: 1824 case Intrinsic::riscv_vsoxseg8: 1825 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1826 return; 1827 case Intrinsic::riscv_vsuxseg2: 1828 case Intrinsic::riscv_vsuxseg3: 1829 case Intrinsic::riscv_vsuxseg4: 1830 case Intrinsic::riscv_vsuxseg5: 1831 case Intrinsic::riscv_vsuxseg6: 1832 case Intrinsic::riscv_vsuxseg7: 1833 case Intrinsic::riscv_vsuxseg8: 1834 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1835 return; 1836 case Intrinsic::riscv_vsoxseg2_mask: 1837 case Intrinsic::riscv_vsoxseg3_mask: 1838 case Intrinsic::riscv_vsoxseg4_mask: 1839 case Intrinsic::riscv_vsoxseg5_mask: 1840 case Intrinsic::riscv_vsoxseg6_mask: 1841 case Intrinsic::riscv_vsoxseg7_mask: 1842 case Intrinsic::riscv_vsoxseg8_mask: 1843 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1844 return; 1845 case Intrinsic::riscv_vsuxseg2_mask: 1846 case Intrinsic::riscv_vsuxseg3_mask: 1847 case Intrinsic::riscv_vsuxseg4_mask: 1848 case Intrinsic::riscv_vsuxseg5_mask: 1849 case Intrinsic::riscv_vsuxseg6_mask: 1850 case Intrinsic::riscv_vsuxseg7_mask: 1851 case Intrinsic::riscv_vsuxseg8_mask: 1852 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1853 return; 1854 case Intrinsic::riscv_vsoxei: 1855 case Intrinsic::riscv_vsoxei_mask: 1856 case Intrinsic::riscv_vsuxei: 1857 case Intrinsic::riscv_vsuxei_mask: { 1858 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1859 IntNo == Intrinsic::riscv_vsuxei_mask; 1860 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1861 IntNo == Intrinsic::riscv_vsoxei_mask; 1862 1863 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1864 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1865 1866 unsigned CurOp = 2; 1867 SmallVector<SDValue, 8> Operands; 1868 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1869 1870 MVT IndexVT; 1871 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1872 /*IsStridedOrIndexed*/ true, Operands, 1873 /*IsLoad=*/false, &IndexVT); 1874 1875 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1876 "Element count mismatch"); 1877 1878 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1879 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1880 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1881 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1882 report_fatal_error("The V extension does not support EEW=64 for index " 1883 "values when XLEN=32"); 1884 } 1885 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1886 IsMasked, IsOrdered, IndexLog2EEW, 1887 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1888 MachineSDNode *Store = 1889 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1890 1891 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1892 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1893 1894 ReplaceNode(Node, Store); 1895 return; 1896 } 1897 case Intrinsic::riscv_vsm: 1898 case Intrinsic::riscv_vse: 1899 case Intrinsic::riscv_vse_mask: 1900 case Intrinsic::riscv_vsse: 1901 case Intrinsic::riscv_vsse_mask: { 1902 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1903 IntNo == Intrinsic::riscv_vsse_mask; 1904 bool IsStrided = 1905 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1906 1907 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1908 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1909 1910 unsigned CurOp = 2; 1911 SmallVector<SDValue, 8> Operands; 1912 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1913 1914 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1915 Operands); 1916 1917 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1918 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1919 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1920 MachineSDNode *Store = 1921 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1922 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1923 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1924 1925 ReplaceNode(Node, Store); 1926 return; 1927 } 1928 } 1929 break; 1930 } 1931 case ISD::BITCAST: { 1932 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1933 // Just drop bitcasts between vectors if both are fixed or both are 1934 // scalable. 1935 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1936 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1937 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1938 CurDAG->RemoveDeadNode(Node); 1939 return; 1940 } 1941 break; 1942 } 1943 case ISD::INSERT_SUBVECTOR: { 1944 SDValue V = Node->getOperand(0); 1945 SDValue SubV = Node->getOperand(1); 1946 SDLoc DL(SubV); 1947 auto Idx = Node->getConstantOperandVal(2); 1948 MVT SubVecVT = SubV.getSimpleValueType(); 1949 1950 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1951 MVT SubVecContainerVT = SubVecVT; 1952 // Establish the correct scalable-vector types for any fixed-length type. 1953 if (SubVecVT.isFixedLengthVector()) 1954 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1955 if (VT.isFixedLengthVector()) 1956 VT = TLI.getContainerForFixedLengthVector(VT); 1957 1958 const auto *TRI = Subtarget->getRegisterInfo(); 1959 unsigned SubRegIdx; 1960 std::tie(SubRegIdx, Idx) = 1961 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1962 VT, SubVecContainerVT, Idx, TRI); 1963 1964 // If the Idx hasn't been completely eliminated then this is a subvector 1965 // insert which doesn't naturally align to a vector register. These must 1966 // be handled using instructions to manipulate the vector registers. 1967 if (Idx != 0) 1968 break; 1969 1970 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1971 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1972 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1973 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1974 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1975 assert((!IsSubVecPartReg || V.isUndef()) && 1976 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1977 "the subvector is smaller than a full-sized register"); 1978 1979 // If we haven't set a SubRegIdx, then we must be going between 1980 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1981 if (SubRegIdx == RISCV::NoSubRegister) { 1982 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1983 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1984 InRegClassID && 1985 "Unexpected subvector extraction"); 1986 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1987 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1988 DL, VT, SubV, RC); 1989 ReplaceNode(Node, NewNode); 1990 return; 1991 } 1992 1993 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1994 ReplaceNode(Node, Insert.getNode()); 1995 return; 1996 } 1997 case ISD::EXTRACT_SUBVECTOR: { 1998 SDValue V = Node->getOperand(0); 1999 auto Idx = Node->getConstantOperandVal(1); 2000 MVT InVT = V.getSimpleValueType(); 2001 SDLoc DL(V); 2002 2003 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2004 MVT SubVecContainerVT = VT; 2005 // Establish the correct scalable-vector types for any fixed-length type. 2006 if (VT.isFixedLengthVector()) 2007 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 2008 if (InVT.isFixedLengthVector()) 2009 InVT = TLI.getContainerForFixedLengthVector(InVT); 2010 2011 const auto *TRI = Subtarget->getRegisterInfo(); 2012 unsigned SubRegIdx; 2013 std::tie(SubRegIdx, Idx) = 2014 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2015 InVT, SubVecContainerVT, Idx, TRI); 2016 2017 // If the Idx hasn't been completely eliminated then this is a subvector 2018 // extract which doesn't naturally align to a vector register. These must 2019 // be handled using instructions to manipulate the vector registers. 2020 if (Idx != 0) 2021 break; 2022 2023 // If we haven't set a SubRegIdx, then we must be going between 2024 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 2025 if (SubRegIdx == RISCV::NoSubRegister) { 2026 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 2027 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2028 InRegClassID && 2029 "Unexpected subvector extraction"); 2030 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2031 SDNode *NewNode = 2032 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 2033 ReplaceNode(Node, NewNode); 2034 return; 2035 } 2036 2037 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 2038 ReplaceNode(Node, Extract.getNode()); 2039 return; 2040 } 2041 case RISCVISD::VMV_S_X_VL: 2042 case RISCVISD::VFMV_S_F_VL: 2043 case RISCVISD::VMV_V_X_VL: 2044 case RISCVISD::VFMV_V_F_VL: { 2045 // Try to match splat of a scalar load to a strided load with stride of x0. 2046 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 2047 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 2048 if (!Node->getOperand(0).isUndef()) 2049 break; 2050 SDValue Src = Node->getOperand(1); 2051 auto *Ld = dyn_cast<LoadSDNode>(Src); 2052 // Can't fold load update node because the second 2053 // output is used so that load update node can't be removed. 2054 if (!Ld || Ld->isIndexed()) 2055 break; 2056 EVT MemVT = Ld->getMemoryVT(); 2057 // The memory VT should be the same size as the element type. 2058 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 2059 break; 2060 if (!IsProfitableToFold(Src, Node, Node) || 2061 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 2062 break; 2063 2064 SDValue VL; 2065 if (IsScalarMove) { 2066 // We could deal with more VL if we update the VSETVLI insert pass to 2067 // avoid introducing more VSETVLI. 2068 if (!isOneConstant(Node->getOperand(2))) 2069 break; 2070 selectVLOp(Node->getOperand(2), VL); 2071 } else 2072 selectVLOp(Node->getOperand(2), VL); 2073 2074 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2075 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 2076 2077 // If VL=1, then we don't need to do a strided load and can just do a 2078 // regular load. 2079 bool IsStrided = !isOneConstant(VL); 2080 2081 // Only do a strided load if we have optimized zero-stride vector load. 2082 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) 2083 break; 2084 2085 SmallVector<SDValue> Operands = 2086 {CurDAG->getUNDEF(VT), Ld->getBasePtr()}; 2087 if (IsStrided) 2088 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); 2089 uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC; 2090 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 2091 Operands.append({VL, SEW, PolicyOp, Ld->getChain()}); 2092 2093 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2094 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 2095 /*IsMasked*/ false, IsStrided, /*FF*/ false, 2096 Log2SEW, static_cast<unsigned>(LMUL)); 2097 MachineSDNode *Load = 2098 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); 2099 // Update the chain. 2100 ReplaceUses(Src.getValue(1), SDValue(Load, 1)); 2101 // Record the mem-refs 2102 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 2103 // Replace the splat with the vlse. 2104 ReplaceNode(Node, Load); 2105 return; 2106 } 2107 case ISD::PREFETCH: 2108 unsigned Locality = Node->getConstantOperandVal(3); 2109 if (Locality > 2) 2110 break; 2111 2112 if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) { 2113 MachineMemOperand *MMO = LoadStoreMem->getMemOperand(); 2114 MMO->setFlags(MachineMemOperand::MONonTemporal); 2115 2116 int NontemporalLevel = 0; 2117 switch (Locality) { 2118 case 0: 2119 NontemporalLevel = 3; // NTL.ALL 2120 break; 2121 case 1: 2122 NontemporalLevel = 1; // NTL.PALL 2123 break; 2124 case 2: 2125 NontemporalLevel = 0; // NTL.P1 2126 break; 2127 default: 2128 llvm_unreachable("unexpected locality value."); 2129 } 2130 2131 if (NontemporalLevel & 0b1) 2132 MMO->setFlags(MONontemporalBit0); 2133 if (NontemporalLevel & 0b10) 2134 MMO->setFlags(MONontemporalBit1); 2135 } 2136 break; 2137 } 2138 2139 // Select the default instruction. 2140 SelectCode(Node); 2141 } 2142 2143 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 2144 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 2145 // Always produce a register and immediate operand, as expected by 2146 // RISCVAsmPrinter::PrintAsmMemoryOperand. 2147 switch (ConstraintID) { 2148 case InlineAsm::Constraint_o: 2149 case InlineAsm::Constraint_m: { 2150 SDValue Op0, Op1; 2151 bool Found = SelectAddrRegImm(Op, Op0, Op1); 2152 assert(Found && "SelectAddrRegImm should always succeed"); 2153 (void)Found; 2154 OutOps.push_back(Op0); 2155 OutOps.push_back(Op1); 2156 return false; 2157 } 2158 case InlineAsm::Constraint_A: 2159 OutOps.push_back(Op); 2160 OutOps.push_back( 2161 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT())); 2162 return false; 2163 default: 2164 report_fatal_error("Unexpected asm memory constraint " + 2165 InlineAsm::getMemConstraintName(ConstraintID)); 2166 } 2167 2168 return true; 2169 } 2170 2171 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 2172 SDValue &Offset) { 2173 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 2174 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 2175 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 2176 return true; 2177 } 2178 2179 return false; 2180 } 2181 2182 // Select a frame index and an optional immediate offset from an ADD or OR. 2183 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 2184 SDValue &Offset) { 2185 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2186 return true; 2187 2188 if (!CurDAG->isBaseWithConstantOffset(Addr)) 2189 return false; 2190 2191 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 2192 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2193 if (isInt<12>(CVal)) { 2194 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 2195 Subtarget->getXLenVT()); 2196 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), 2197 Subtarget->getXLenVT()); 2198 return true; 2199 } 2200 } 2201 2202 return false; 2203 } 2204 2205 // Fold constant addresses. 2206 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, 2207 const MVT VT, const RISCVSubtarget *Subtarget, 2208 SDValue Addr, SDValue &Base, SDValue &Offset) { 2209 if (!isa<ConstantSDNode>(Addr)) 2210 return false; 2211 2212 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); 2213 2214 // If the constant is a simm12, we can fold the whole constant and use X0 as 2215 // the base. If the constant can be materialized with LUI+simm12, use LUI as 2216 // the base. We can't use generateInstSeq because it favors LUI+ADDIW. 2217 int64_t Lo12 = SignExtend64<12>(CVal); 2218 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; 2219 if (!Subtarget->is64Bit() || isInt<32>(Hi)) { 2220 if (Hi) { 2221 int64_t Hi20 = (Hi >> 12) & 0xfffff; 2222 Base = SDValue( 2223 CurDAG->getMachineNode(RISCV::LUI, DL, VT, 2224 CurDAG->getTargetConstant(Hi20, DL, VT)), 2225 0); 2226 } else { 2227 Base = CurDAG->getRegister(RISCV::X0, VT); 2228 } 2229 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 2230 return true; 2231 } 2232 2233 // Ask how constant materialization would handle this constant. 2234 RISCVMatInt::InstSeq Seq = 2235 RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits()); 2236 2237 // If the last instruction would be an ADDI, we can fold its immediate and 2238 // emit the rest of the sequence as the base. 2239 if (Seq.back().getOpcode() != RISCV::ADDI) 2240 return false; 2241 Lo12 = Seq.back().getImm(); 2242 2243 // Drop the last instruction. 2244 Seq.pop_back(); 2245 assert(!Seq.empty() && "Expected more instructions in sequence"); 2246 2247 Base = selectImmSeq(CurDAG, DL, VT, Seq); 2248 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 2249 return true; 2250 } 2251 2252 // Is this ADD instruction only used as the base pointer of scalar loads and 2253 // stores? 2254 static bool isWorthFoldingAdd(SDValue Add) { 2255 for (auto *Use : Add->uses()) { 2256 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 2257 Use->getOpcode() != ISD::ATOMIC_LOAD && 2258 Use->getOpcode() != ISD::ATOMIC_STORE) 2259 return false; 2260 EVT VT = cast<MemSDNode>(Use)->getMemoryVT(); 2261 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && 2262 VT != MVT::f64) 2263 return false; 2264 // Don't allow stores of the value. It must be used as the address. 2265 if (Use->getOpcode() == ISD::STORE && 2266 cast<StoreSDNode>(Use)->getValue() == Add) 2267 return false; 2268 if (Use->getOpcode() == ISD::ATOMIC_STORE && 2269 cast<AtomicSDNode>(Use)->getVal() == Add) 2270 return false; 2271 } 2272 2273 return true; 2274 } 2275 2276 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, 2277 unsigned MaxShiftAmount, 2278 SDValue &Base, SDValue &Index, 2279 SDValue &Scale) { 2280 EVT VT = Addr.getSimpleValueType(); 2281 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index, 2282 SDValue &Shift) { 2283 uint64_t ShiftAmt = 0; 2284 Index = N; 2285 2286 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) { 2287 // Only match shifts by a value in range [0, MaxShiftAmount]. 2288 if (N.getConstantOperandVal(1) <= MaxShiftAmount) { 2289 Index = N.getOperand(0); 2290 ShiftAmt = N.getConstantOperandVal(1); 2291 } 2292 } 2293 2294 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT); 2295 return ShiftAmt != 0; 2296 }; 2297 2298 if (Addr.getOpcode() == ISD::ADD) { 2299 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 2300 SDValue AddrB = Addr.getOperand(0); 2301 if (AddrB.getOpcode() == ISD::ADD && 2302 UnwrapShl(AddrB.getOperand(0), Index, Scale) && 2303 !isa<ConstantSDNode>(AddrB.getOperand(1)) && 2304 isInt<12>(C1->getSExtValue())) { 2305 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) 2306 SDValue C1Val = 2307 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT); 2308 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, 2309 AddrB.getOperand(1), C1Val), 2310 0); 2311 return true; 2312 } 2313 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) { 2314 Base = Addr.getOperand(1); 2315 return true; 2316 } else { 2317 UnwrapShl(Addr.getOperand(1), Index, Scale); 2318 Base = Addr.getOperand(0); 2319 return true; 2320 } 2321 } else if (UnwrapShl(Addr, Index, Scale)) { 2322 EVT VT = Addr.getValueType(); 2323 Base = CurDAG->getRegister(RISCV::X0, VT); 2324 return true; 2325 } 2326 2327 return false; 2328 } 2329 2330 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 2331 SDValue &Offset, bool IsINX) { 2332 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2333 return true; 2334 2335 SDLoc DL(Addr); 2336 MVT VT = Addr.getSimpleValueType(); 2337 2338 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 2339 Base = Addr.getOperand(0); 2340 Offset = Addr.getOperand(1); 2341 return true; 2342 } 2343 2344 int64_t RV32ZdinxRange = IsINX ? 4 : 0; 2345 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2346 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2347 if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) { 2348 Base = Addr.getOperand(0); 2349 if (Base.getOpcode() == RISCVISD::ADD_LO) { 2350 SDValue LoOperand = Base.getOperand(1); 2351 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 2352 // If the Lo in (ADD_LO hi, lo) is a global variable's address 2353 // (its low part, really), then we can rely on the alignment of that 2354 // variable to provide a margin of safety before low part can overflow 2355 // the 12 bits of the load/store offset. Check if CVal falls within 2356 // that margin; if so (low part + CVal) can't overflow. 2357 const DataLayout &DL = CurDAG->getDataLayout(); 2358 Align Alignment = commonAlignment( 2359 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 2360 if (CVal == 0 || Alignment > CVal) { 2361 int64_t CombinedOffset = CVal + GA->getOffset(); 2362 Base = Base.getOperand(0); 2363 Offset = CurDAG->getTargetGlobalAddress( 2364 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 2365 CombinedOffset, GA->getTargetFlags()); 2366 return true; 2367 } 2368 } 2369 } 2370 2371 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2372 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2373 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 2374 return true; 2375 } 2376 } 2377 2378 // Handle ADD with large immediates. 2379 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2380 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2381 assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) && 2382 "simm12 not already handled?"); 2383 2384 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use 2385 // an ADDI for part of the offset and fold the rest into the load/store. 2386 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 2387 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { 2388 int64_t Adj = CVal < 0 ? -2048 : 2047; 2389 Base = SDValue( 2390 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 2391 CurDAG->getTargetConstant(Adj, DL, VT)), 2392 0); 2393 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); 2394 return true; 2395 } 2396 2397 // For larger immediates, we might be able to save one instruction from 2398 // constant materialization by folding the Lo12 bits of the immediate into 2399 // the address. We should only do this if the ADD is only used by loads and 2400 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled 2401 // separately with the full materialized immediate creating extra 2402 // instructions. 2403 if (isWorthFoldingAdd(Addr) && 2404 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2405 Offset)) { 2406 // Insert an ADD instruction with the materialized Hi52 bits. 2407 Base = SDValue( 2408 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2409 0); 2410 return true; 2411 } 2412 } 2413 2414 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) 2415 return true; 2416 2417 Base = Addr; 2418 Offset = CurDAG->getTargetConstant(0, DL, VT); 2419 return true; 2420 } 2421 2422 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 2423 SDValue &ShAmt) { 2424 ShAmt = N; 2425 2426 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift 2427 // amount. If there is an AND on the shift amount, we can bypass it if it 2428 // doesn't affect any of those bits. 2429 if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2430 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1); 2431 2432 // Since the max shift amount is a power of 2 we can subtract 1 to make a 2433 // mask that covers the bits needed to represent all shift amounts. 2434 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 2435 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 2436 2437 if (ShMask.isSubsetOf(AndMask)) { 2438 ShAmt = ShAmt.getOperand(0); 2439 } else { 2440 // SimplifyDemandedBits may have optimized the mask so try restoring any 2441 // bits that are known zero. 2442 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0)); 2443 if (!ShMask.isSubsetOf(AndMask | Known.Zero)) 2444 return true; 2445 ShAmt = ShAmt.getOperand(0); 2446 } 2447 } 2448 2449 if (ShAmt.getOpcode() == ISD::ADD && 2450 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2451 uint64_t Imm = ShAmt.getConstantOperandVal(1); 2452 // If we are shifting by X+N where N == 0 mod Size, then just shift by X 2453 // to avoid the ADD. 2454 if (Imm != 0 && Imm % ShiftWidth == 0) { 2455 ShAmt = ShAmt.getOperand(0); 2456 return true; 2457 } 2458 } else if (ShAmt.getOpcode() == ISD::SUB && 2459 isa<ConstantSDNode>(ShAmt.getOperand(0))) { 2460 uint64_t Imm = ShAmt.getConstantOperandVal(0); 2461 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2462 // generate a NEG instead of a SUB of a constant. 2463 if (Imm != 0 && Imm % ShiftWidth == 0) { 2464 SDLoc DL(ShAmt); 2465 EVT VT = ShAmt.getValueType(); 2466 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 2467 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 2468 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 2469 ShAmt.getOperand(1)); 2470 ShAmt = SDValue(Neg, 0); 2471 return true; 2472 } 2473 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 2474 // to generate a NOT instead of a SUB of a constant. 2475 if (Imm % ShiftWidth == ShiftWidth - 1) { 2476 SDLoc DL(ShAmt); 2477 EVT VT = ShAmt.getValueType(); 2478 MachineSDNode *Not = 2479 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1), 2480 CurDAG->getTargetConstant(-1, DL, VT)); 2481 ShAmt = SDValue(Not, 0); 2482 return true; 2483 } 2484 } 2485 2486 return true; 2487 } 2488 2489 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can 2490 /// check for equality with 0. This function emits instructions that convert the 2491 /// seteq/setne into something that can be compared with 0. 2492 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g. 2493 /// ISD::SETNE). 2494 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, 2495 SDValue &Val) { 2496 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && 2497 "Unexpected condition code!"); 2498 2499 // We're looking for a setcc. 2500 if (N->getOpcode() != ISD::SETCC) 2501 return false; 2502 2503 // Must be an equality comparison. 2504 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); 2505 if (CCVal != ExpectedCCVal) 2506 return false; 2507 2508 SDValue LHS = N->getOperand(0); 2509 SDValue RHS = N->getOperand(1); 2510 2511 if (!LHS.getValueType().isScalarInteger()) 2512 return false; 2513 2514 // If the RHS side is 0, we don't need any extra instructions, return the LHS. 2515 if (isNullConstant(RHS)) { 2516 Val = LHS; 2517 return true; 2518 } 2519 2520 SDLoc DL(N); 2521 2522 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) { 2523 int64_t CVal = C->getSExtValue(); 2524 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and 2525 // non-zero otherwise. 2526 if (CVal == -2048) { 2527 Val = 2528 SDValue(CurDAG->getMachineNode( 2529 RISCV::XORI, DL, N->getValueType(0), LHS, 2530 CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))), 2531 0); 2532 return true; 2533 } 2534 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the 2535 // LHS is equal to the RHS and non-zero otherwise. 2536 if (isInt<12>(CVal) || CVal == 2048) { 2537 Val = 2538 SDValue(CurDAG->getMachineNode( 2539 RISCV::ADDI, DL, N->getValueType(0), LHS, 2540 CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))), 2541 0); 2542 return true; 2543 } 2544 } 2545 2546 // If nothing else we can XOR the LHS and RHS to produce zero if they are 2547 // equal and a non-zero value if they aren't. 2548 Val = SDValue( 2549 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0); 2550 return true; 2551 } 2552 2553 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) { 2554 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 2555 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) { 2556 Val = N.getOperand(0); 2557 return true; 2558 } 2559 2560 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) { 2561 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1))) 2562 return N; 2563 2564 SDValue N0 = N.getOperand(0); 2565 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 2566 N.getConstantOperandVal(1) == ShiftAmt && 2567 N0.getConstantOperandVal(1) == ShiftAmt) 2568 return N0.getOperand(0); 2569 2570 return N; 2571 }; 2572 2573 MVT VT = N.getSimpleValueType(); 2574 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) { 2575 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits); 2576 return true; 2577 } 2578 2579 return false; 2580 } 2581 2582 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) { 2583 if (N.getOpcode() == ISD::AND) { 2584 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2585 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { 2586 Val = N.getOperand(0); 2587 return true; 2588 } 2589 } 2590 MVT VT = N.getSimpleValueType(); 2591 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits); 2592 if (CurDAG->MaskedValueIsZero(N, Mask)) { 2593 Val = N; 2594 return true; 2595 } 2596 2597 return false; 2598 } 2599 2600 /// Look for various patterns that can be done with a SHL that can be folded 2601 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 2602 /// SHXADD we are trying to match. 2603 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 2604 SDValue &Val) { 2605 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 2606 SDValue N0 = N.getOperand(0); 2607 2608 bool LeftShift = N0.getOpcode() == ISD::SHL; 2609 if ((LeftShift || N0.getOpcode() == ISD::SRL) && 2610 isa<ConstantSDNode>(N0.getOperand(1))) { 2611 uint64_t Mask = N.getConstantOperandVal(1); 2612 unsigned C2 = N0.getConstantOperandVal(1); 2613 2614 unsigned XLen = Subtarget->getXLen(); 2615 if (LeftShift) 2616 Mask &= maskTrailingZeros<uint64_t>(C2); 2617 else 2618 Mask &= maskTrailingOnes<uint64_t>(XLen - C2); 2619 2620 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no 2621 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 2622 // followed by a SHXADD with c3 for the X amount. 2623 if (isShiftedMask_64(Mask)) { 2624 unsigned Leading = XLen - llvm::bit_width(Mask); 2625 unsigned Trailing = llvm::countr_zero(Mask); 2626 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { 2627 SDLoc DL(N); 2628 EVT VT = N.getValueType(); 2629 Val = SDValue(CurDAG->getMachineNode( 2630 RISCV::SRLI, DL, VT, N0.getOperand(0), 2631 CurDAG->getTargetConstant(Trailing - C2, DL, VT)), 2632 0); 2633 return true; 2634 } 2635 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 2636 // leading zeros and c3 trailing zeros. We can use an SRLI by C3 2637 // followed by a SHXADD using c3 for the X amount. 2638 if (!LeftShift && Leading == C2 && Trailing == ShAmt) { 2639 SDLoc DL(N); 2640 EVT VT = N.getValueType(); 2641 Val = SDValue( 2642 CurDAG->getMachineNode( 2643 RISCV::SRLI, DL, VT, N0.getOperand(0), 2644 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), 2645 0); 2646 return true; 2647 } 2648 } 2649 } 2650 } 2651 2652 bool LeftShift = N.getOpcode() == ISD::SHL; 2653 if ((LeftShift || N.getOpcode() == ISD::SRL) && 2654 isa<ConstantSDNode>(N.getOperand(1))) { 2655 SDValue N0 = N.getOperand(0); 2656 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 2657 isa<ConstantSDNode>(N0.getOperand(1))) { 2658 uint64_t Mask = N0.getConstantOperandVal(1); 2659 if (isShiftedMask_64(Mask)) { 2660 unsigned C1 = N.getConstantOperandVal(1); 2661 unsigned XLen = Subtarget->getXLen(); 2662 unsigned Leading = XLen - llvm::bit_width(Mask); 2663 unsigned Trailing = llvm::countr_zero(Mask); 2664 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 2665 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 2666 if (LeftShift && Leading == 32 && Trailing > 0 && 2667 (Trailing + C1) == ShAmt) { 2668 SDLoc DL(N); 2669 EVT VT = N.getValueType(); 2670 Val = SDValue(CurDAG->getMachineNode( 2671 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2672 CurDAG->getTargetConstant(Trailing, DL, VT)), 2673 0); 2674 return true; 2675 } 2676 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 2677 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 2678 if (!LeftShift && Leading == 32 && Trailing > C1 && 2679 (Trailing - C1) == ShAmt) { 2680 SDLoc DL(N); 2681 EVT VT = N.getValueType(); 2682 Val = SDValue(CurDAG->getMachineNode( 2683 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2684 CurDAG->getTargetConstant(Trailing, DL, VT)), 2685 0); 2686 return true; 2687 } 2688 } 2689 } 2690 } 2691 2692 return false; 2693 } 2694 2695 /// Look for various patterns that can be done with a SHL that can be folded 2696 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which 2697 /// SHXADD_UW we are trying to match. 2698 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, 2699 SDValue &Val) { 2700 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) && 2701 N.hasOneUse()) { 2702 SDValue N0 = N.getOperand(0); 2703 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 2704 N0.hasOneUse()) { 2705 uint64_t Mask = N.getConstantOperandVal(1); 2706 unsigned C2 = N0.getConstantOperandVal(1); 2707 2708 Mask &= maskTrailingZeros<uint64_t>(C2); 2709 2710 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 2711 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by 2712 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount. 2713 if (isShiftedMask_64(Mask)) { 2714 unsigned Leading = llvm::countl_zero(Mask); 2715 unsigned Trailing = llvm::countr_zero(Mask); 2716 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) { 2717 SDLoc DL(N); 2718 EVT VT = N.getValueType(); 2719 Val = SDValue(CurDAG->getMachineNode( 2720 RISCV::SLLI, DL, VT, N0.getOperand(0), 2721 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)), 2722 0); 2723 return true; 2724 } 2725 } 2726 } 2727 } 2728 2729 return false; 2730 } 2731 2732 // Return true if all users of this SDNode* only consume the lower \p Bits. 2733 // This can be used to form W instructions for add/sub/mul/shl even when the 2734 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 2735 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 2736 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 2737 // the add/sub/mul/shl to become non-W instructions. By checking the users we 2738 // may be able to use a W instruction and CSE with the other instruction if 2739 // this has happened. We could try to detect that the CSE opportunity exists 2740 // before doing this, but that would be more complicated. 2741 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, 2742 const unsigned Depth) const { 2743 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2744 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2745 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || 2746 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || 2747 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2748 isa<ConstantSDNode>(Node) || Depth != 0) && 2749 "Unexpected opcode"); 2750 2751 if (Depth >= SelectionDAG::MaxRecursionDepth) 2752 return false; 2753 2754 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2755 SDNode *User = *UI; 2756 // Users of this node should have already been instruction selected 2757 if (!User->isMachineOpcode()) 2758 return false; 2759 2760 // TODO: Add more opcodes? 2761 switch (User->getMachineOpcode()) { 2762 default: 2763 return false; 2764 case RISCV::ADDW: 2765 case RISCV::ADDIW: 2766 case RISCV::SUBW: 2767 case RISCV::MULW: 2768 case RISCV::SLLW: 2769 case RISCV::SLLIW: 2770 case RISCV::SRAW: 2771 case RISCV::SRAIW: 2772 case RISCV::SRLW: 2773 case RISCV::SRLIW: 2774 case RISCV::DIVW: 2775 case RISCV::DIVUW: 2776 case RISCV::REMW: 2777 case RISCV::REMUW: 2778 case RISCV::ROLW: 2779 case RISCV::RORW: 2780 case RISCV::RORIW: 2781 case RISCV::CLZW: 2782 case RISCV::CTZW: 2783 case RISCV::CPOPW: 2784 case RISCV::SLLI_UW: 2785 case RISCV::FMV_W_X: 2786 case RISCV::FCVT_H_W: 2787 case RISCV::FCVT_H_WU: 2788 case RISCV::FCVT_S_W: 2789 case RISCV::FCVT_S_WU: 2790 case RISCV::FCVT_D_W: 2791 case RISCV::FCVT_D_WU: 2792 case RISCV::TH_REVW: 2793 case RISCV::TH_SRRIW: 2794 if (Bits < 32) 2795 return false; 2796 break; 2797 case RISCV::SLL: 2798 case RISCV::SRA: 2799 case RISCV::SRL: 2800 case RISCV::ROL: 2801 case RISCV::ROR: 2802 case RISCV::BSET: 2803 case RISCV::BCLR: 2804 case RISCV::BINV: 2805 // Shift amount operands only use log2(Xlen) bits. 2806 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen())) 2807 return false; 2808 break; 2809 case RISCV::SLLI: 2810 // SLLI only uses the lower (XLen - ShAmt) bits. 2811 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2812 return false; 2813 break; 2814 case RISCV::ANDI: 2815 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1))) 2816 break; 2817 goto RecCheck; 2818 case RISCV::ORI: { 2819 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue(); 2820 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 2821 break; 2822 [[fallthrough]]; 2823 } 2824 case RISCV::AND: 2825 case RISCV::OR: 2826 case RISCV::XOR: 2827 case RISCV::XORI: 2828 case RISCV::ANDN: 2829 case RISCV::ORN: 2830 case RISCV::XNOR: 2831 case RISCV::SH1ADD: 2832 case RISCV::SH2ADD: 2833 case RISCV::SH3ADD: 2834 RecCheck: 2835 if (hasAllNBitUsers(User, Bits, Depth + 1)) 2836 break; 2837 return false; 2838 case RISCV::SRLI: { 2839 unsigned ShAmt = User->getConstantOperandVal(1); 2840 // If we are shifting right by less than Bits, and users don't demand any 2841 // bits that were shifted into [Bits-1:0], then we can consider this as an 2842 // N-Bit user. 2843 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1)) 2844 break; 2845 return false; 2846 } 2847 case RISCV::SEXT_B: 2848 case RISCV::PACKH: 2849 if (Bits < 8) 2850 return false; 2851 break; 2852 case RISCV::SEXT_H: 2853 case RISCV::FMV_H_X: 2854 case RISCV::ZEXT_H_RV32: 2855 case RISCV::ZEXT_H_RV64: 2856 case RISCV::PACKW: 2857 if (Bits < 16) 2858 return false; 2859 break; 2860 case RISCV::PACK: 2861 if (Bits < (Subtarget->getXLen() / 2)) 2862 return false; 2863 break; 2864 case RISCV::ADD_UW: 2865 case RISCV::SH1ADD_UW: 2866 case RISCV::SH2ADD_UW: 2867 case RISCV::SH3ADD_UW: 2868 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 2869 // 32 bits. 2870 if (UI.getOperandNo() != 0 || Bits < 32) 2871 return false; 2872 break; 2873 case RISCV::SB: 2874 if (UI.getOperandNo() != 0 || Bits < 8) 2875 return false; 2876 break; 2877 case RISCV::SH: 2878 if (UI.getOperandNo() != 0 || Bits < 16) 2879 return false; 2880 break; 2881 case RISCV::SW: 2882 if (UI.getOperandNo() != 0 || Bits < 32) 2883 return false; 2884 break; 2885 } 2886 } 2887 2888 return true; 2889 } 2890 2891 // Select a constant that can be represented as (sign_extend(imm5) << imm2). 2892 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, 2893 SDValue &Shl2) { 2894 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2895 int64_t Offset = C->getSExtValue(); 2896 int64_t Shift; 2897 for (Shift = 0; Shift < 4; Shift++) 2898 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 2899 break; 2900 2901 // Constant cannot be encoded. 2902 if (Shift == 4) 2903 return false; 2904 2905 EVT Ty = N->getValueType(0); 2906 Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty); 2907 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty); 2908 return true; 2909 } 2910 2911 return false; 2912 } 2913 2914 // Select VL as a 5 bit immediate or a value that will become a register. This 2915 // allows us to choose betwen VSETIVLI or VSETVLI later. 2916 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 2917 auto *C = dyn_cast<ConstantSDNode>(N); 2918 if (C && isUInt<5>(C->getZExtValue())) { 2919 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 2920 N->getValueType(0)); 2921 } else if (C && C->isAllOnes()) { 2922 // Treat all ones as VLMax. 2923 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2924 N->getValueType(0)); 2925 } else if (isa<RegisterSDNode>(N) && 2926 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 2927 // All our VL operands use an operand that allows GPRNoX0 or an immediate 2928 // as the register class. Convert X0 to a special immediate to pass the 2929 // MachineVerifier. This is recognized specially by the vsetvli insertion 2930 // pass. 2931 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2932 N->getValueType(0)); 2933 } else { 2934 VL = N; 2935 } 2936 2937 return true; 2938 } 2939 2940 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 2941 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 2942 return false; 2943 assert(N.getNumOperands() == 3 && "Unexpected number of operands"); 2944 SplatVal = N.getOperand(1); 2945 return true; 2946 } 2947 2948 using ValidateFn = bool (*)(int64_t); 2949 2950 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 2951 SelectionDAG &DAG, 2952 const RISCVSubtarget &Subtarget, 2953 ValidateFn ValidateImm) { 2954 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2955 !isa<ConstantSDNode>(N.getOperand(1))) 2956 return false; 2957 assert(N.getNumOperands() == 3 && "Unexpected number of operands"); 2958 2959 int64_t SplatImm = 2960 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2961 2962 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 2963 // type is wider than the resulting vector element type: an implicit 2964 // truncation first takes place. Therefore, perform a manual 2965 // truncation/sign-extension in order to ignore any truncated bits and catch 2966 // any zero-extended immediate. 2967 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 2968 // sign-extending to (XLenVT -1). 2969 MVT XLenVT = Subtarget.getXLenVT(); 2970 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 2971 "Unexpected splat operand type"); 2972 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 2973 if (EltVT.bitsLT(XLenVT)) 2974 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 2975 2976 if (!ValidateImm(SplatImm)) 2977 return false; 2978 2979 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 2980 return true; 2981 } 2982 2983 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2984 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2985 [](int64_t Imm) { return isInt<5>(Imm); }); 2986 } 2987 2988 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2989 return selectVSplatSimmHelper( 2990 N, SplatVal, *CurDAG, *Subtarget, 2991 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2992 } 2993 2994 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2995 SDValue &SplatVal) { 2996 return selectVSplatSimmHelper( 2997 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2998 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2999 }); 3000 } 3001 3002 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, 3003 SDValue &SplatVal) { 3004 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 3005 !isa<ConstantSDNode>(N.getOperand(1))) 3006 return false; 3007 3008 int64_t SplatImm = 3009 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 3010 3011 if (!isUIntN(Bits, SplatImm)) 3012 return false; 3013 3014 SplatVal = 3015 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 3016 3017 return true; 3018 } 3019 3020 bool RISCVDAGToDAGISel::selectExtOneUseVSplat(SDValue N, SDValue &SplatVal) { 3021 if (N->getOpcode() == ISD::SIGN_EXTEND || 3022 N->getOpcode() == ISD::ZERO_EXTEND) { 3023 if (!N.hasOneUse()) 3024 return false; 3025 N = N->getOperand(0); 3026 } 3027 return selectVSplat(N, SplatVal); 3028 } 3029 3030 bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { 3031 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode()); 3032 if (!CFP) 3033 return false; 3034 const APFloat &APF = CFP->getValueAPF(); 3035 // td can handle +0.0 already. 3036 if (APF.isPosZero()) 3037 return false; 3038 3039 MVT VT = CFP->getSimpleValueType(0); 3040 3041 if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF, 3042 VT) >= 0) 3043 return false; 3044 3045 MVT XLenVT = Subtarget->getXLenVT(); 3046 if (VT == MVT::f64 && !Subtarget->is64Bit()) { 3047 assert(APF.isNegZero() && "Unexpected constant."); 3048 return false; 3049 } 3050 SDLoc DL(N); 3051 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 3052 *Subtarget); 3053 return true; 3054 } 3055 3056 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 3057 SDValue &Imm) { 3058 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 3059 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 3060 3061 if (!isInt<5>(ImmVal)) 3062 return false; 3063 3064 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 3065 return true; 3066 } 3067 3068 return false; 3069 } 3070 3071 // Try to remove sext.w if the input is a W instruction or can be made into 3072 // a W instruction cheaply. 3073 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 3074 // Look for the sext.w pattern, addiw rd, rs1, 0. 3075 if (N->getMachineOpcode() != RISCV::ADDIW || 3076 !isNullConstant(N->getOperand(1))) 3077 return false; 3078 3079 SDValue N0 = N->getOperand(0); 3080 if (!N0.isMachineOpcode()) 3081 return false; 3082 3083 switch (N0.getMachineOpcode()) { 3084 default: 3085 break; 3086 case RISCV::ADD: 3087 case RISCV::ADDI: 3088 case RISCV::SUB: 3089 case RISCV::MUL: 3090 case RISCV::SLLI: { 3091 // Convert sext.w+add/sub/mul to their W instructions. This will create 3092 // a new independent instruction. This improves latency. 3093 unsigned Opc; 3094 switch (N0.getMachineOpcode()) { 3095 default: 3096 llvm_unreachable("Unexpected opcode!"); 3097 case RISCV::ADD: Opc = RISCV::ADDW; break; 3098 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 3099 case RISCV::SUB: Opc = RISCV::SUBW; break; 3100 case RISCV::MUL: Opc = RISCV::MULW; break; 3101 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 3102 } 3103 3104 SDValue N00 = N0.getOperand(0); 3105 SDValue N01 = N0.getOperand(1); 3106 3107 // Shift amount needs to be uimm5. 3108 if (N0.getMachineOpcode() == RISCV::SLLI && 3109 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 3110 break; 3111 3112 SDNode *Result = 3113 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 3114 N00, N01); 3115 ReplaceUses(N, Result); 3116 return true; 3117 } 3118 case RISCV::ADDW: 3119 case RISCV::ADDIW: 3120 case RISCV::SUBW: 3121 case RISCV::MULW: 3122 case RISCV::SLLIW: 3123 case RISCV::PACKW: 3124 case RISCV::TH_MULAW: 3125 case RISCV::TH_MULAH: 3126 case RISCV::TH_MULSW: 3127 case RISCV::TH_MULSH: 3128 // Result is already sign extended just remove the sext.w. 3129 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 3130 ReplaceUses(N, N0.getNode()); 3131 return true; 3132 } 3133 3134 return false; 3135 } 3136 3137 static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { 3138 // Check that we're using V0 as a mask register. 3139 if (!isa<RegisterSDNode>(MaskOp) || 3140 cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0) 3141 return false; 3142 3143 // The glued user defines V0. 3144 const auto *Glued = GlueOp.getNode(); 3145 3146 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 3147 return false; 3148 3149 // Check that we're defining V0 as a mask register. 3150 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 3151 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 3152 return false; 3153 3154 // Check the instruction defining V0; it needs to be a VMSET pseudo. 3155 SDValue MaskSetter = Glued->getOperand(2); 3156 3157 const auto IsVMSet = [](unsigned Opc) { 3158 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 3159 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 3160 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 3161 Opc == RISCV::PseudoVMSET_M_B8; 3162 }; 3163 3164 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 3165 // undefined behaviour if it's the wrong bitwidth, so we could choose to 3166 // assume that it's all-ones? Same applies to its VL. 3167 return MaskSetter->isMachineOpcode() && 3168 IsVMSet(MaskSetter.getMachineOpcode()); 3169 } 3170 3171 // Return true if we can make sure mask of N is all-ones mask. 3172 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { 3173 return usesAllOnesMask(N->getOperand(MaskOpIdx), 3174 N->getOperand(N->getNumOperands() - 1)); 3175 } 3176 3177 static bool isImplicitDef(SDValue V) { 3178 return V.isMachineOpcode() && 3179 V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; 3180 } 3181 3182 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 3183 // corresponding "unmasked" pseudo versions. The mask we're interested in will 3184 // take the form of a V0 physical register operand, with a glued 3185 // register-setting instruction. 3186 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 3187 const RISCV::RISCVMaskedPseudoInfo *I = 3188 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 3189 if (!I) 3190 return false; 3191 3192 unsigned MaskOpIdx = I->MaskOpIdx; 3193 if (!usesAllOnesMask(N, MaskOpIdx)) 3194 return false; 3195 3196 // There are two classes of pseudos in the table - compares and 3197 // everything else. See the comment on RISCVMaskedPseudo for details. 3198 const unsigned Opc = I->UnmaskedPseudo; 3199 const MCInstrDesc &MCID = TII->get(Opc); 3200 const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags); 3201 #ifndef NDEBUG 3202 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 3203 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) == 3204 RISCVII::hasVecPolicyOp(MCID.TSFlags) && 3205 "Masked and unmasked pseudos are inconsistent"); 3206 const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID); 3207 assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure"); 3208 #endif 3209 3210 SmallVector<SDValue, 8> Ops; 3211 // Skip the merge operand at index 0 if !UseTUPseudo. 3212 for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) { 3213 // Skip the mask, and the Glue. 3214 SDValue Op = N->getOperand(I); 3215 if (I == MaskOpIdx || Op.getValueType() == MVT::Glue) 3216 continue; 3217 Ops.push_back(Op); 3218 } 3219 3220 // Transitively apply any node glued to our new node. 3221 const auto *Glued = N->getGluedNode(); 3222 if (auto *TGlued = Glued->getGluedNode()) 3223 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 3224 3225 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 3226 Result->setFlags(N->getFlags()); 3227 ReplaceUses(N, Result); 3228 3229 return true; 3230 } 3231 3232 static bool IsVMerge(SDNode *N) { 3233 unsigned Opc = N->getMachineOpcode(); 3234 return Opc == RISCV::PseudoVMERGE_VVM_MF8 || 3235 Opc == RISCV::PseudoVMERGE_VVM_MF4 || 3236 Opc == RISCV::PseudoVMERGE_VVM_MF2 || 3237 Opc == RISCV::PseudoVMERGE_VVM_M1 || 3238 Opc == RISCV::PseudoVMERGE_VVM_M2 || 3239 Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8; 3240 } 3241 3242 static bool IsVMv(SDNode *N) { 3243 unsigned Opc = N->getMachineOpcode(); 3244 return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 || 3245 Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 || 3246 Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 || 3247 Opc == RISCV::PseudoVMV_V_V_M8; 3248 } 3249 3250 static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { 3251 switch (LMUL) { 3252 case RISCVII::LMUL_F8: 3253 return RISCV::PseudoVMSET_M_B1; 3254 case RISCVII::LMUL_F4: 3255 return RISCV::PseudoVMSET_M_B2; 3256 case RISCVII::LMUL_F2: 3257 return RISCV::PseudoVMSET_M_B4; 3258 case RISCVII::LMUL_1: 3259 return RISCV::PseudoVMSET_M_B8; 3260 case RISCVII::LMUL_2: 3261 return RISCV::PseudoVMSET_M_B16; 3262 case RISCVII::LMUL_4: 3263 return RISCV::PseudoVMSET_M_B32; 3264 case RISCVII::LMUL_8: 3265 return RISCV::PseudoVMSET_M_B64; 3266 case RISCVII::LMUL_RESERVED: 3267 llvm_unreachable("Unexpected LMUL"); 3268 } 3269 llvm_unreachable("Unknown VLMUL enum"); 3270 } 3271 3272 // Try to fold away VMERGE_VVM instructions. We handle these cases: 3273 // -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction 3274 // folds to a masked TU instruction. VMERGE_VVM must have have merge operand 3275 // same as false operand. 3276 // -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a 3277 // masked TA instruction. 3278 // -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to 3279 // masked TU instruction. Both instructions must have the same merge operand. 3280 // VMERGE_VVM must have have merge operand same as false operand. 3281 // Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied, 3282 // not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo 3283 // form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo 3284 // form. 3285 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { 3286 SDValue Merge, False, True, VL, Mask, Glue; 3287 // A vmv.v.v is equivalent to a vmerge with an all-ones mask. 3288 if (IsVMv(N)) { 3289 Merge = N->getOperand(0); 3290 False = N->getOperand(0); 3291 True = N->getOperand(1); 3292 VL = N->getOperand(2); 3293 // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones 3294 // mask later below. 3295 } else { 3296 assert(IsVMerge(N)); 3297 Merge = N->getOperand(0); 3298 False = N->getOperand(1); 3299 True = N->getOperand(2); 3300 Mask = N->getOperand(3); 3301 VL = N->getOperand(4); 3302 // We always have a glue node for the mask at v0. 3303 Glue = N->getOperand(N->getNumOperands() - 1); 3304 } 3305 assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0); 3306 assert(!Glue || Glue.getValueType() == MVT::Glue); 3307 3308 // We require that either merge and false are the same, or that merge 3309 // is undefined. 3310 if (Merge != False && !isImplicitDef(Merge)) 3311 return false; 3312 3313 assert(True.getResNo() == 0 && 3314 "Expect True is the first output of an instruction."); 3315 3316 // Need N is the exactly one using True. 3317 if (!True.hasOneUse()) 3318 return false; 3319 3320 if (!True.isMachineOpcode()) 3321 return false; 3322 3323 unsigned TrueOpc = True.getMachineOpcode(); 3324 const MCInstrDesc &TrueMCID = TII->get(TrueOpc); 3325 uint64_t TrueTSFlags = TrueMCID.TSFlags; 3326 bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); 3327 3328 bool IsMasked = false; 3329 const RISCV::RISCVMaskedPseudoInfo *Info = 3330 RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); 3331 if (!Info && HasTiedDest) { 3332 Info = RISCV::getMaskedPseudoInfo(TrueOpc); 3333 IsMasked = true; 3334 } 3335 3336 if (!Info) 3337 return false; 3338 3339 if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { 3340 // The vmerge instruction must be TU. 3341 // FIXME: This could be relaxed, but we need to handle the policy for the 3342 // resulting op correctly. 3343 if (isImplicitDef(Merge)) 3344 return false; 3345 SDValue MergeOpTrue = True->getOperand(0); 3346 // Both the vmerge instruction and the True instruction must have the same 3347 // merge operand. 3348 if (False != MergeOpTrue) 3349 return false; 3350 } 3351 3352 if (IsMasked) { 3353 assert(HasTiedDest && "Expected tied dest"); 3354 // The vmerge instruction must be TU. 3355 if (isImplicitDef(Merge)) 3356 return false; 3357 // The vmerge instruction must have an all 1s mask since we're going to keep 3358 // the mask from the True instruction. 3359 // FIXME: Support mask agnostic True instruction which would have an 3360 // undef merge operand. 3361 if (Mask && !usesAllOnesMask(Mask, Glue)) 3362 return false; 3363 } 3364 3365 // Skip if True has side effect. 3366 // TODO: Support vleff and vlsegff. 3367 if (TII->get(TrueOpc).hasUnmodeledSideEffects()) 3368 return false; 3369 3370 // The last operand of a masked instruction may be glued. 3371 bool HasGlueOp = True->getGluedNode() != nullptr; 3372 3373 // The chain operand may exist either before the glued operands or in the last 3374 // position. 3375 unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; 3376 bool HasChainOp = 3377 True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other; 3378 3379 if (HasChainOp) { 3380 // Avoid creating cycles in the DAG. We must ensure that none of the other 3381 // operands depend on True through it's Chain. 3382 SmallVector<const SDNode *, 4> LoopWorklist; 3383 SmallPtrSet<const SDNode *, 16> Visited; 3384 LoopWorklist.push_back(False.getNode()); 3385 if (Mask) 3386 LoopWorklist.push_back(Mask.getNode()); 3387 LoopWorklist.push_back(VL.getNode()); 3388 if (Glue) 3389 LoopWorklist.push_back(Glue.getNode()); 3390 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) 3391 return false; 3392 } 3393 3394 // The vector policy operand may be present for masked intrinsics 3395 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags); 3396 unsigned TrueVLIndex = 3397 True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; 3398 SDValue TrueVL = True.getOperand(TrueVLIndex); 3399 SDValue SEW = True.getOperand(TrueVLIndex + 1); 3400 3401 auto GetMinVL = [](SDValue LHS, SDValue RHS) { 3402 if (LHS == RHS) 3403 return LHS; 3404 if (isAllOnesConstant(LHS)) 3405 return RHS; 3406 if (isAllOnesConstant(RHS)) 3407 return LHS; 3408 auto *CLHS = dyn_cast<ConstantSDNode>(LHS); 3409 auto *CRHS = dyn_cast<ConstantSDNode>(RHS); 3410 if (!CLHS || !CRHS) 3411 return SDValue(); 3412 return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; 3413 }; 3414 3415 // Because N and True must have the same merge operand (or True's operand is 3416 // implicit_def), the "effective" body is the minimum of their VLs. 3417 SDValue OrigVL = VL; 3418 VL = GetMinVL(TrueVL, VL); 3419 if (!VL) 3420 return false; 3421 3422 // If we end up changing the VL or mask of True, then we need to make sure it 3423 // doesn't raise any observable fp exceptions, since changing the active 3424 // elements will affect how fflags is set. 3425 if (TrueVL != VL || !IsMasked) 3426 if (mayRaiseFPException(True.getNode()) && 3427 !True->getFlags().hasNoFPExcept()) 3428 return false; 3429 3430 SDLoc DL(N); 3431 3432 // From the preconditions we checked above, we know the mask and thus glue 3433 // for the result node will be taken from True. 3434 if (IsMasked) { 3435 Mask = True->getOperand(Info->MaskOpIdx); 3436 Glue = True->getOperand(True->getNumOperands() - 1); 3437 assert(Glue.getValueType() == MVT::Glue); 3438 } 3439 // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create 3440 // an all-ones mask to use. 3441 else if (IsVMv(N)) { 3442 unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags; 3443 unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags)); 3444 ElementCount EC = N->getValueType(0).getVectorElementCount(); 3445 MVT MaskVT = MVT::getVectorVT(MVT::i1, EC); 3446 3447 SDValue AllOnesMask = 3448 SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0); 3449 SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 3450 RISCV::V0, AllOnesMask, SDValue()); 3451 Mask = CurDAG->getRegister(RISCV::V0, MaskVT); 3452 Glue = MaskCopy.getValue(1); 3453 } 3454 3455 unsigned MaskedOpc = Info->MaskedPseudo; 3456 #ifndef NDEBUG 3457 const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); 3458 assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && 3459 "Expected instructions with mask have policy operand."); 3460 assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), 3461 MCOI::TIED_TO) == 0 && 3462 "Expected instructions with mask have a tied dest."); 3463 #endif 3464 3465 // Use a tumu policy, relaxing it to tail agnostic provided that the merge 3466 // operand is undefined. 3467 // 3468 // However, if the VL became smaller than what the vmerge had originally, then 3469 // elements past VL that were previously in the vmerge's body will have moved 3470 // to the tail. In that case we always need to use tail undisturbed to 3471 // preserve them. 3472 bool MergeVLShrunk = VL != OrigVL; 3473 uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk) 3474 ? RISCVII::TAIL_AGNOSTIC 3475 : /*TUMU*/ 0; 3476 SDValue PolicyOp = 3477 CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); 3478 3479 3480 SmallVector<SDValue, 8> Ops; 3481 Ops.push_back(False); 3482 3483 const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags); 3484 const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode; 3485 assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx); 3486 Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd); 3487 3488 Ops.push_back(Mask); 3489 3490 // For unmasked "VOp" with rounding mode operand, that is interfaces like 3491 // (..., rm, vl) or (..., rm, vl, policy). 3492 // Its masked version is (..., vm, rm, vl, policy). 3493 // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td 3494 if (HasRoundingMode) 3495 Ops.push_back(True->getOperand(TrueVLIndex - 1)); 3496 3497 Ops.append({VL, SEW, PolicyOp}); 3498 3499 // Result node should have chain operand of True. 3500 if (HasChainOp) 3501 Ops.push_back(True.getOperand(TrueChainOpIdx)); 3502 3503 // Add the glue for the CopyToReg of mask->v0. 3504 Ops.push_back(Glue); 3505 3506 SDNode *Result = 3507 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); 3508 Result->setFlags(True->getFlags()); 3509 3510 // Replace vmerge.vvm node by Result. 3511 ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); 3512 3513 // Replace another value of True. E.g. chain and VL. 3514 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) 3515 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); 3516 3517 // Try to transform Result to unmasked intrinsic. 3518 doPeepholeMaskedRVV(Result); 3519 return true; 3520 } 3521 3522 // Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to 3523 // (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET. 3524 bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) { 3525 #define CASE_VMERGE_TO_VMV(lmul) \ 3526 case RISCV::PseudoVMERGE_VVM_##lmul: \ 3527 NewOpc = RISCV::PseudoVMV_V_V_##lmul; \ 3528 break; 3529 unsigned NewOpc; 3530 switch (N->getMachineOpcode()) { 3531 default: 3532 llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction."); 3533 CASE_VMERGE_TO_VMV(MF8) 3534 CASE_VMERGE_TO_VMV(MF4) 3535 CASE_VMERGE_TO_VMV(MF2) 3536 CASE_VMERGE_TO_VMV(M1) 3537 CASE_VMERGE_TO_VMV(M2) 3538 CASE_VMERGE_TO_VMV(M4) 3539 CASE_VMERGE_TO_VMV(M8) 3540 } 3541 3542 if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) 3543 return false; 3544 3545 SDLoc DL(N); 3546 SDValue PolicyOp = 3547 CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT()); 3548 SDNode *Result = CurDAG->getMachineNode( 3549 NewOpc, DL, N->getValueType(0), 3550 {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5), 3551 PolicyOp}); 3552 ReplaceUses(N, Result); 3553 return true; 3554 } 3555 3556 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { 3557 bool MadeChange = false; 3558 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 3559 3560 while (Position != CurDAG->allnodes_begin()) { 3561 SDNode *N = &*--Position; 3562 if (N->use_empty() || !N->isMachineOpcode()) 3563 continue; 3564 3565 if (IsVMerge(N) || IsVMv(N)) 3566 MadeChange |= performCombineVMergeAndVOps(N); 3567 if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1)) 3568 MadeChange |= performVMergeToVMv(N); 3569 } 3570 return MadeChange; 3571 } 3572 3573 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 3574 // for instruction scheduling. 3575 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 3576 CodeGenOpt::Level OptLevel) { 3577 return new RISCVDAGToDAGISel(TM, OptLevel); 3578 } 3579 3580 char RISCVDAGToDAGISel::ID = 0; 3581 3582 INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 3583