1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace RISCV 43 } // namespace llvm 44 45 void RISCVDAGToDAGISel::PreprocessISelDAG() { 46 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 47 48 bool MadeChange = false; 49 while (Position != CurDAG->allnodes_begin()) { 50 SDNode *N = &*--Position; 51 if (N->use_empty()) 52 continue; 53 54 SDValue Result; 55 switch (N->getOpcode()) { 56 case ISD::SPLAT_VECTOR: { 57 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 58 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 59 MVT VT = N->getSimpleValueType(0); 60 unsigned Opc = 61 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 62 SDLoc DL(N); 63 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 64 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 65 N->getOperand(0), VL); 66 break; 67 } 68 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { 69 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 70 // load. Done after lowering and combining so that we have a chance to 71 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 72 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 73 MVT VT = N->getSimpleValueType(0); 74 SDValue Passthru = N->getOperand(0); 75 SDValue Lo = N->getOperand(1); 76 SDValue Hi = N->getOperand(2); 77 SDValue VL = N->getOperand(3); 78 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 79 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 80 "Unexpected VTs!"); 81 MachineFunction &MF = CurDAG->getMachineFunction(); 82 RISCVMachineFunctionInfo *FuncInfo = 83 MF.getInfo<RISCVMachineFunctionInfo>(); 84 SDLoc DL(N); 85 86 // We use the same frame index we use for moving two i32s into 64-bit FPR. 87 // This is an analogous operation. 88 int FI = FuncInfo->getMoveF64FrameIndex(MF); 89 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 90 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 91 SDValue StackSlot = 92 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 93 94 SDValue Chain = CurDAG->getEntryNode(); 95 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 96 97 SDValue OffsetSlot = 98 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 99 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 100 Align(8)); 101 102 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 103 104 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 105 SDValue IntID = 106 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 107 SDValue Ops[] = {Chain, 108 IntID, 109 Passthru, 110 StackSlot, 111 CurDAG->getRegister(RISCV::X0, MVT::i64), 112 VL}; 113 114 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 115 MVT::i64, MPI, Align(8), 116 MachineMemOperand::MOLoad); 117 break; 118 } 119 } 120 121 if (Result) { 122 LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: "); 123 LLVM_DEBUG(N->dump(CurDAG)); 124 LLVM_DEBUG(dbgs() << "\nNew: "); 125 LLVM_DEBUG(Result->dump(CurDAG)); 126 LLVM_DEBUG(dbgs() << "\n"); 127 128 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 129 MadeChange = true; 130 } 131 } 132 133 if (MadeChange) 134 CurDAG->RemoveDeadNodes(); 135 } 136 137 void RISCVDAGToDAGISel::PostprocessISelDAG() { 138 HandleSDNode Dummy(CurDAG->getRoot()); 139 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 140 141 bool MadeChange = false; 142 while (Position != CurDAG->allnodes_begin()) { 143 SDNode *N = &*--Position; 144 // Skip dead nodes and any non-machine opcodes. 145 if (N->use_empty() || !N->isMachineOpcode()) 146 continue; 147 148 MadeChange |= doPeepholeSExtW(N); 149 MadeChange |= doPeepholeMaskedRVV(N); 150 } 151 152 CurDAG->setRoot(Dummy.getValue()); 153 154 if (MadeChange) 155 CurDAG->RemoveDeadNodes(); 156 } 157 158 static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 159 RISCVMatInt::InstSeq &Seq) { 160 SDNode *Result = nullptr; 161 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 162 for (RISCVMatInt::Inst &Inst : Seq) { 163 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); 164 switch (Inst.getOpndKind()) { 165 case RISCVMatInt::Imm: 166 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); 167 break; 168 case RISCVMatInt::RegX0: 169 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, 170 CurDAG->getRegister(RISCV::X0, VT)); 171 break; 172 case RISCVMatInt::RegReg: 173 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg); 174 break; 175 case RISCVMatInt::RegImm: 176 Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); 177 break; 178 } 179 180 // Only the first instruction has X0 as its source. 181 SrcReg = SDValue(Result, 0); 182 } 183 184 return Result; 185 } 186 187 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 188 int64_t Imm, const RISCVSubtarget &Subtarget) { 189 RISCVMatInt::InstSeq Seq = 190 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 191 192 return selectImmSeq(CurDAG, DL, VT, Seq); 193 } 194 195 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 196 unsigned NF, RISCVII::VLMUL LMUL) { 197 static const unsigned M1TupleRegClassIDs[] = { 198 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 199 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 200 RISCV::VRN8M1RegClassID}; 201 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, 202 RISCV::VRN3M2RegClassID, 203 RISCV::VRN4M2RegClassID}; 204 205 assert(Regs.size() >= 2 && Regs.size() <= 8); 206 207 unsigned RegClassID; 208 unsigned SubReg0; 209 switch (LMUL) { 210 default: 211 llvm_unreachable("Invalid LMUL."); 212 case RISCVII::VLMUL::LMUL_F8: 213 case RISCVII::VLMUL::LMUL_F4: 214 case RISCVII::VLMUL::LMUL_F2: 215 case RISCVII::VLMUL::LMUL_1: 216 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 217 "Unexpected subreg numbering"); 218 SubReg0 = RISCV::sub_vrm1_0; 219 RegClassID = M1TupleRegClassIDs[NF - 2]; 220 break; 221 case RISCVII::VLMUL::LMUL_2: 222 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 223 "Unexpected subreg numbering"); 224 SubReg0 = RISCV::sub_vrm2_0; 225 RegClassID = M2TupleRegClassIDs[NF - 2]; 226 break; 227 case RISCVII::VLMUL::LMUL_4: 228 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 229 "Unexpected subreg numbering"); 230 SubReg0 = RISCV::sub_vrm4_0; 231 RegClassID = RISCV::VRN2M4RegClassID; 232 break; 233 } 234 235 SDLoc DL(Regs[0]); 236 SmallVector<SDValue, 8> Ops; 237 238 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 239 240 for (unsigned I = 0; I < Regs.size(); ++I) { 241 Ops.push_back(Regs[I]); 242 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 243 } 244 SDNode *N = 245 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 246 return SDValue(N, 0); 247 } 248 249 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 250 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 251 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 252 bool IsLoad, MVT *IndexVT) { 253 SDValue Chain = Node->getOperand(0); 254 SDValue Glue; 255 256 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 257 258 if (IsStridedOrIndexed) { 259 Operands.push_back(Node->getOperand(CurOp++)); // Index. 260 if (IndexVT) 261 *IndexVT = Operands.back()->getSimpleValueType(0); 262 } 263 264 if (IsMasked) { 265 // Mask needs to be copied to V0. 266 SDValue Mask = Node->getOperand(CurOp++); 267 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 268 Glue = Chain.getValue(1); 269 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 270 } 271 SDValue VL; 272 selectVLOp(Node->getOperand(CurOp++), VL); 273 Operands.push_back(VL); 274 275 MVT XLenVT = Subtarget->getXLenVT(); 276 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 277 Operands.push_back(SEWOp); 278 279 // Masked load has the tail policy argument. 280 if (IsMasked && IsLoad) { 281 // Policy must be a constant. 282 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 283 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 284 Operands.push_back(PolicyOp); 285 } 286 287 Operands.push_back(Chain); // Chain. 288 if (Glue) 289 Operands.push_back(Glue); 290 } 291 292 static bool isAllUndef(ArrayRef<SDValue> Values) { 293 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); 294 } 295 296 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 297 bool IsStrided) { 298 SDLoc DL(Node); 299 unsigned NF = Node->getNumValues() - 1; 300 MVT VT = Node->getSimpleValueType(0); 301 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 302 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 303 304 unsigned CurOp = 2; 305 SmallVector<SDValue, 8> Operands; 306 307 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 308 Node->op_begin() + CurOp + NF); 309 bool IsTU = IsMasked || !isAllUndef(Regs); 310 if (IsTU) { 311 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 312 Operands.push_back(Merge); 313 } 314 CurOp += NF; 315 316 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 317 Operands, /*IsLoad=*/true); 318 319 const RISCV::VLSEGPseudo *P = 320 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 321 static_cast<unsigned>(LMUL)); 322 MachineSDNode *Load = 323 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 324 325 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 326 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 327 328 SDValue SuperReg = SDValue(Load, 0); 329 for (unsigned I = 0; I < NF; ++I) { 330 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 331 ReplaceUses(SDValue(Node, I), 332 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 333 } 334 335 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 336 CurDAG->RemoveDeadNode(Node); 337 } 338 339 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 340 SDLoc DL(Node); 341 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 342 MVT VT = Node->getSimpleValueType(0); 343 MVT XLenVT = Subtarget->getXLenVT(); 344 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 345 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 346 347 unsigned CurOp = 2; 348 SmallVector<SDValue, 7> Operands; 349 350 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 351 Node->op_begin() + CurOp + NF); 352 bool IsTU = IsMasked || !isAllUndef(Regs); 353 if (IsTU) { 354 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 355 Operands.push_back(MaskedOff); 356 } 357 CurOp += NF; 358 359 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 360 /*IsStridedOrIndexed*/ false, Operands, 361 /*IsLoad=*/true); 362 363 const RISCV::VLSEGPseudo *P = 364 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 365 Log2SEW, static_cast<unsigned>(LMUL)); 366 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 367 XLenVT, MVT::Other, Operands); 368 369 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 370 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 371 372 SDValue SuperReg = SDValue(Load, 0); 373 for (unsigned I = 0; I < NF; ++I) { 374 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 375 ReplaceUses(SDValue(Node, I), 376 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 377 } 378 379 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 380 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 381 CurDAG->RemoveDeadNode(Node); 382 } 383 384 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 385 bool IsOrdered) { 386 SDLoc DL(Node); 387 unsigned NF = Node->getNumValues() - 1; 388 MVT VT = Node->getSimpleValueType(0); 389 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 390 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 391 392 unsigned CurOp = 2; 393 SmallVector<SDValue, 8> Operands; 394 395 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 396 Node->op_begin() + CurOp + NF); 397 bool IsTU = IsMasked || !isAllUndef(Regs); 398 if (IsTU) { 399 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 400 Operands.push_back(MaskedOff); 401 } 402 CurOp += NF; 403 404 MVT IndexVT; 405 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 406 /*IsStridedOrIndexed*/ true, Operands, 407 /*IsLoad=*/true, &IndexVT); 408 409 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 410 "Element count mismatch"); 411 412 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 413 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 414 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 415 report_fatal_error("The V extension does not support EEW=64 for index " 416 "values when XLEN=32"); 417 } 418 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 419 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 420 static_cast<unsigned>(IndexLMUL)); 421 MachineSDNode *Load = 422 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 423 424 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 425 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 426 427 SDValue SuperReg = SDValue(Load, 0); 428 for (unsigned I = 0; I < NF; ++I) { 429 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 430 ReplaceUses(SDValue(Node, I), 431 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 432 } 433 434 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 435 CurDAG->RemoveDeadNode(Node); 436 } 437 438 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 439 bool IsStrided) { 440 SDLoc DL(Node); 441 unsigned NF = Node->getNumOperands() - 4; 442 if (IsStrided) 443 NF--; 444 if (IsMasked) 445 NF--; 446 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 447 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 448 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 449 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 450 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 451 452 SmallVector<SDValue, 8> Operands; 453 Operands.push_back(StoreVal); 454 unsigned CurOp = 2 + NF; 455 456 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 457 Operands); 458 459 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 460 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 461 MachineSDNode *Store = 462 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 463 464 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 465 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 466 467 ReplaceNode(Node, Store); 468 } 469 470 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 471 bool IsOrdered) { 472 SDLoc DL(Node); 473 unsigned NF = Node->getNumOperands() - 5; 474 if (IsMasked) 475 --NF; 476 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 477 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 478 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 479 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 480 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 481 482 SmallVector<SDValue, 8> Operands; 483 Operands.push_back(StoreVal); 484 unsigned CurOp = 2 + NF; 485 486 MVT IndexVT; 487 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 488 /*IsStridedOrIndexed*/ true, Operands, 489 /*IsLoad=*/false, &IndexVT); 490 491 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 492 "Element count mismatch"); 493 494 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 495 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 496 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 497 report_fatal_error("The V extension does not support EEW=64 for index " 498 "values when XLEN=32"); 499 } 500 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 501 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 502 static_cast<unsigned>(IndexLMUL)); 503 MachineSDNode *Store = 504 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 505 506 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 507 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 508 509 ReplaceNode(Node, Store); 510 } 511 512 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 513 if (!Subtarget->hasVInstructions()) 514 return; 515 516 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 517 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 518 "Unexpected opcode"); 519 520 SDLoc DL(Node); 521 MVT XLenVT = Subtarget->getXLenVT(); 522 523 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 524 unsigned IntNoOffset = HasChain ? 1 : 0; 525 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 526 527 assert((IntNo == Intrinsic::riscv_vsetvli || 528 IntNo == Intrinsic::riscv_vsetvlimax || 529 IntNo == Intrinsic::riscv_vsetvli_opt || 530 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 531 "Unexpected vsetvli intrinsic"); 532 533 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 534 IntNo == Intrinsic::riscv_vsetvlimax_opt; 535 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 536 537 assert(Node->getNumOperands() == Offset + 2 && 538 "Unexpected number of operands"); 539 540 unsigned SEW = 541 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 542 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 543 Node->getConstantOperandVal(Offset + 1) & 0x7); 544 545 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 546 /*MaskAgnostic*/ false); 547 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 548 549 SmallVector<EVT, 2> VTs = {XLenVT}; 550 if (HasChain) 551 VTs.push_back(MVT::Other); 552 553 SDValue VLOperand; 554 unsigned Opcode = RISCV::PseudoVSETVLI; 555 if (VLMax) { 556 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 557 Opcode = RISCV::PseudoVSETVLIX0; 558 } else { 559 VLOperand = Node->getOperand(IntNoOffset + 1); 560 561 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 562 uint64_t AVL = C->getZExtValue(); 563 if (isUInt<5>(AVL)) { 564 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 565 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 566 if (HasChain) 567 Ops.push_back(Node->getOperand(0)); 568 ReplaceNode( 569 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 570 return; 571 } 572 } 573 } 574 575 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 576 if (HasChain) 577 Ops.push_back(Node->getOperand(0)); 578 579 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 580 } 581 582 void RISCVDAGToDAGISel::Select(SDNode *Node) { 583 // If we have a custom node, we have already selected. 584 if (Node->isMachineOpcode()) { 585 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 586 Node->setNodeId(-1); 587 return; 588 } 589 590 // Instruction Selection not handled by the auto-generated tablegen selection 591 // should be handled here. 592 unsigned Opcode = Node->getOpcode(); 593 MVT XLenVT = Subtarget->getXLenVT(); 594 SDLoc DL(Node); 595 MVT VT = Node->getSimpleValueType(0); 596 597 switch (Opcode) { 598 case ISD::Constant: { 599 auto *ConstNode = cast<ConstantSDNode>(Node); 600 if (VT == XLenVT && ConstNode->isZero()) { 601 SDValue New = 602 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 603 ReplaceNode(Node, New.getNode()); 604 return; 605 } 606 int64_t Imm = ConstNode->getSExtValue(); 607 // If the upper XLen-16 bits are not used, try to convert this to a simm12 608 // by sign extending bit 15. 609 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 610 hasAllHUsers(Node)) 611 Imm = SignExtend64<16>(Imm); 612 // If the upper 32-bits are not used try to convert this into a simm32 by 613 // sign extending bit 32. 614 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 615 Imm = SignExtend64<32>(Imm); 616 617 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 618 return; 619 } 620 case ISD::SHL: { 621 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 622 if (!N1C) 623 break; 624 SDValue N0 = Node->getOperand(0); 625 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 626 !isa<ConstantSDNode>(N0.getOperand(1))) 627 break; 628 unsigned ShAmt = N1C->getZExtValue(); 629 uint64_t Mask = N0.getConstantOperandVal(1); 630 631 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has 632 // 32 leading zeros and C3 trailing zeros. 633 if (ShAmt <= 32 && isShiftedMask_64(Mask)) { 634 unsigned XLen = Subtarget->getXLen(); 635 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); 636 unsigned TrailingZeros = countTrailingZeros(Mask); 637 if (TrailingZeros > 0 && LeadingZeros == 32) { 638 SDNode *SRLIW = CurDAG->getMachineNode( 639 RISCV::SRLIW, DL, VT, N0->getOperand(0), 640 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 641 SDNode *SLLI = CurDAG->getMachineNode( 642 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 643 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 644 ReplaceNode(Node, SLLI); 645 return; 646 } 647 } 648 break; 649 } 650 case ISD::SRL: { 651 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 652 if (!N1C) 653 break; 654 SDValue N0 = Node->getOperand(0); 655 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 656 !isa<ConstantSDNode>(N0.getOperand(1))) 657 break; 658 unsigned ShAmt = N1C->getZExtValue(); 659 uint64_t Mask = N0.getConstantOperandVal(1); 660 661 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 662 // 32 leading zeros and C3 trailing zeros. 663 if (isShiftedMask_64(Mask)) { 664 unsigned XLen = Subtarget->getXLen(); 665 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); 666 unsigned TrailingZeros = countTrailingZeros(Mask); 667 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 668 SDNode *SRLIW = CurDAG->getMachineNode( 669 RISCV::SRLIW, DL, VT, N0->getOperand(0), 670 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 671 SDNode *SLLI = CurDAG->getMachineNode( 672 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 673 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 674 ReplaceNode(Node, SLLI); 675 return; 676 } 677 } 678 679 // Optimize (srl (and X, C2), C) -> 680 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 681 // Where C2 is a mask with C3 trailing ones. 682 // Taking into account that the C2 may have had lower bits unset by 683 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 684 // This pattern occurs when type legalizing right shifts for types with 685 // less than XLen bits. 686 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 687 if (!isMask_64(Mask)) 688 break; 689 unsigned TrailingOnes = countTrailingOnes(Mask); 690 // 32 trailing ones should use srliw via tablegen pattern. 691 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 692 break; 693 // If C2 is (1 << ShAmt) use bexti if possible. 694 if (Subtarget->hasStdExtZbs() && ShAmt + 1 == TrailingOnes) { 695 SDNode *BEXTI = 696 CurDAG->getMachineNode(RISCV::BEXTI, DL, VT, N0->getOperand(0), 697 CurDAG->getTargetConstant(ShAmt, DL, VT)); 698 ReplaceNode(Node, BEXTI); 699 return; 700 } 701 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 702 SDNode *SLLI = 703 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 704 CurDAG->getTargetConstant(LShAmt, DL, VT)); 705 SDNode *SRLI = CurDAG->getMachineNode( 706 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 707 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 708 ReplaceNode(Node, SRLI); 709 return; 710 } 711 case ISD::SRA: { 712 // Optimize (sra (sext_inreg X, i16), C) -> 713 // (srai (slli X, (XLen-16), (XLen-16) + C) 714 // And (sra (sext_inreg X, i8), C) -> 715 // (srai (slli X, (XLen-8), (XLen-8) + C) 716 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 717 // This transform matches the code we get without Zbb. The shifts are more 718 // compressible, and this can help expose CSE opportunities in the sdiv by 719 // constant optimization. 720 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 721 if (!N1C) 722 break; 723 SDValue N0 = Node->getOperand(0); 724 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 725 break; 726 unsigned ShAmt = N1C->getZExtValue(); 727 unsigned ExtSize = 728 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 729 // ExtSize of 32 should use sraiw via tablegen pattern. 730 if (ExtSize >= 32 || ShAmt >= ExtSize) 731 break; 732 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 733 SDNode *SLLI = 734 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 735 CurDAG->getTargetConstant(LShAmt, DL, VT)); 736 SDNode *SRAI = CurDAG->getMachineNode( 737 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 738 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 739 ReplaceNode(Node, SRAI); 740 return; 741 } 742 case ISD::AND: { 743 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 744 if (!N1C) 745 break; 746 747 SDValue N0 = Node->getOperand(0); 748 749 bool LeftShift = N0.getOpcode() == ISD::SHL; 750 if (!LeftShift && N0.getOpcode() != ISD::SRL) 751 break; 752 753 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 754 if (!C) 755 break; 756 unsigned C2 = C->getZExtValue(); 757 unsigned XLen = Subtarget->getXLen(); 758 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 759 760 uint64_t C1 = N1C->getZExtValue(); 761 762 // Keep track of whether this is a c.andi. If we can't use c.andi, the 763 // shift pair might offer more compression opportunities. 764 // TODO: We could check for C extension here, but we don't have many lit 765 // tests with the C extension enabled so not checking gets better coverage. 766 // TODO: What if ANDI faster than shift? 767 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 768 769 // Clear irrelevant bits in the mask. 770 if (LeftShift) 771 C1 &= maskTrailingZeros<uint64_t>(C2); 772 else 773 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 774 775 // Some transforms should only be done if the shift has a single use or 776 // the AND would become (srli (slli X, 32), 32) 777 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 778 779 SDValue X = N0.getOperand(0); 780 781 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 782 // with c3 leading zeros. 783 if (!LeftShift && isMask_64(C1)) { 784 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 785 if (C2 < Leading) { 786 // If the number of leading zeros is C2+32 this can be SRLIW. 787 if (C2 + 32 == Leading) { 788 SDNode *SRLIW = CurDAG->getMachineNode( 789 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 790 ReplaceNode(Node, SRLIW); 791 return; 792 } 793 794 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 795 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 796 // 797 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 798 // legalized and goes through DAG combine. 799 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 800 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 801 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 802 SDNode *SRAIW = 803 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 804 CurDAG->getTargetConstant(31, DL, VT)); 805 SDNode *SRLIW = CurDAG->getMachineNode( 806 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 807 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 808 ReplaceNode(Node, SRLIW); 809 return; 810 } 811 812 // (srli (slli x, c3-c2), c3). 813 // Skip if we could use (zext.w (sraiw X, C2)). 814 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 815 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 816 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 817 // Also Skip if we can use bexti. 818 Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1; 819 if (OneUseOrZExtW && !Skip) { 820 SDNode *SLLI = CurDAG->getMachineNode( 821 RISCV::SLLI, DL, VT, X, 822 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 823 SDNode *SRLI = CurDAG->getMachineNode( 824 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 825 CurDAG->getTargetConstant(Leading, DL, VT)); 826 ReplaceNode(Node, SRLI); 827 return; 828 } 829 } 830 } 831 832 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 833 // shifted by c2 bits with c3 leading zeros. 834 if (LeftShift && isShiftedMask_64(C1)) { 835 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 836 837 if (C2 + Leading < XLen && 838 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 839 // Use slli.uw when possible. 840 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 841 SDNode *SLLI_UW = CurDAG->getMachineNode( 842 RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 843 ReplaceNode(Node, SLLI_UW); 844 return; 845 } 846 847 // (srli (slli c2+c3), c3) 848 if (OneUseOrZExtW && !IsCANDI) { 849 SDNode *SLLI = CurDAG->getMachineNode( 850 RISCV::SLLI, DL, VT, X, 851 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 852 SDNode *SRLI = CurDAG->getMachineNode( 853 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 854 CurDAG->getTargetConstant(Leading, DL, VT)); 855 ReplaceNode(Node, SRLI); 856 return; 857 } 858 } 859 } 860 861 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 862 // shifted mask with c2 leading zeros and c3 trailing zeros. 863 if (!LeftShift && isShiftedMask_64(C1)) { 864 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 865 unsigned Trailing = countTrailingZeros(C1); 866 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && !IsCANDI) { 867 unsigned SrliOpc = RISCV::SRLI; 868 // If the input is zexti32 we should use SRLIW. 869 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && 870 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 871 SrliOpc = RISCV::SRLIW; 872 X = X.getOperand(0); 873 } 874 SDNode *SRLI = CurDAG->getMachineNode( 875 SrliOpc, DL, VT, X, 876 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 877 SDNode *SLLI = 878 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 879 CurDAG->getTargetConstant(Trailing, DL, VT)); 880 ReplaceNode(Node, SLLI); 881 return; 882 } 883 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 884 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 885 OneUseOrZExtW && !IsCANDI) { 886 SDNode *SRLIW = CurDAG->getMachineNode( 887 RISCV::SRLIW, DL, VT, X, 888 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 889 SDNode *SLLI = 890 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 891 CurDAG->getTargetConstant(Trailing, DL, VT)); 892 ReplaceNode(Node, SLLI); 893 return; 894 } 895 } 896 897 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 898 // shifted mask with no leading zeros and c3 trailing zeros. 899 if (LeftShift && isShiftedMask_64(C1)) { 900 unsigned Leading = XLen - (64 - countLeadingZeros(C1)); 901 unsigned Trailing = countTrailingZeros(C1); 902 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 903 SDNode *SRLI = CurDAG->getMachineNode( 904 RISCV::SRLI, DL, VT, X, 905 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 906 SDNode *SLLI = 907 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 908 CurDAG->getTargetConstant(Trailing, DL, VT)); 909 ReplaceNode(Node, SLLI); 910 return; 911 } 912 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 913 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 914 SDNode *SRLIW = CurDAG->getMachineNode( 915 RISCV::SRLIW, DL, VT, X, 916 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 917 SDNode *SLLI = 918 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 919 CurDAG->getTargetConstant(Trailing, DL, VT)); 920 ReplaceNode(Node, SLLI); 921 return; 922 } 923 } 924 925 break; 926 } 927 case ISD::MUL: { 928 // Special case for calculating (mul (and X, C2), C1) where the full product 929 // fits in XLen bits. We can shift X left by the number of leading zeros in 930 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 931 // product has XLen trailing zeros, putting it in the output of MULHU. This 932 // can avoid materializing a constant in a register for C2. 933 934 // RHS should be a constant. 935 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 936 if (!N1C || !N1C->hasOneUse()) 937 break; 938 939 // LHS should be an AND with constant. 940 SDValue N0 = Node->getOperand(0); 941 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 942 break; 943 944 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 945 946 // Constant should be a mask. 947 if (!isMask_64(C2)) 948 break; 949 950 // If this can be an ANDI, ZEXT.H or ZEXT.W, don't do this if the ANDI/ZEXT 951 // has multiple users or the constant is a simm12. This prevents inserting 952 // a shift and still have uses of the AND/ZEXT. Shifting a simm12 will 953 // likely make it more costly to materialize. Otherwise, using a SLLI 954 // might allow it to be compressed. 955 bool IsANDIOrZExt = 956 isInt<12>(C2) || 957 (C2 == UINT64_C(0xFFFF) && 958 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 959 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()); 960 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) 961 break; 962 963 // We need to shift left the AND input and C1 by a total of XLen bits. 964 965 // How far left do we need to shift the AND input? 966 unsigned XLen = Subtarget->getXLen(); 967 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 968 969 // The constant gets shifted by the remaining amount unless that would 970 // shift bits out. 971 uint64_t C1 = N1C->getZExtValue(); 972 unsigned ConstantShift = XLen - LeadingZeros; 973 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 974 break; 975 976 uint64_t ShiftedC1 = C1 << ConstantShift; 977 // If this RV32, we need to sign extend the constant. 978 if (XLen == 32) 979 ShiftedC1 = SignExtend64<32>(ShiftedC1); 980 981 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 982 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 983 SDNode *SLLI = 984 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 985 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 986 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 987 SDValue(SLLI, 0), SDValue(Imm, 0)); 988 ReplaceNode(Node, MULHU); 989 return; 990 } 991 case ISD::INTRINSIC_WO_CHAIN: { 992 unsigned IntNo = Node->getConstantOperandVal(0); 993 switch (IntNo) { 994 // By default we do not custom select any intrinsic. 995 default: 996 break; 997 case Intrinsic::riscv_vmsgeu: 998 case Intrinsic::riscv_vmsge: { 999 SDValue Src1 = Node->getOperand(1); 1000 SDValue Src2 = Node->getOperand(2); 1001 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1002 bool IsCmpUnsignedZero = false; 1003 // Only custom select scalar second operand. 1004 if (Src2.getValueType() != XLenVT) 1005 break; 1006 // Small constants are handled with patterns. 1007 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1008 int64_t CVal = C->getSExtValue(); 1009 if (CVal >= -15 && CVal <= 16) { 1010 if (!IsUnsigned || CVal != 0) 1011 break; 1012 IsCmpUnsignedZero = true; 1013 } 1014 } 1015 MVT Src1VT = Src1.getSimpleValueType(); 1016 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1017 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1018 default: 1019 llvm_unreachable("Unexpected LMUL!"); 1020 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1021 case RISCVII::VLMUL::lmulenum: \ 1022 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1023 : RISCV::PseudoVMSLT_VX_##suffix; \ 1024 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1025 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1026 break; 1027 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1028 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1029 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1030 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1031 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1032 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1033 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1034 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1035 } 1036 SDValue SEW = CurDAG->getTargetConstant( 1037 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1038 SDValue VL; 1039 selectVLOp(Node->getOperand(3), VL); 1040 1041 // If vmsgeu with 0 immediate, expand it to vmset. 1042 if (IsCmpUnsignedZero) { 1043 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1044 return; 1045 } 1046 1047 // Expand to 1048 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1049 SDValue Cmp = SDValue( 1050 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1051 0); 1052 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1053 {Cmp, Cmp, VL, SEW})); 1054 return; 1055 } 1056 case Intrinsic::riscv_vmsgeu_mask: 1057 case Intrinsic::riscv_vmsge_mask: { 1058 SDValue Src1 = Node->getOperand(2); 1059 SDValue Src2 = Node->getOperand(3); 1060 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1061 bool IsCmpUnsignedZero = false; 1062 // Only custom select scalar second operand. 1063 if (Src2.getValueType() != XLenVT) 1064 break; 1065 // Small constants are handled with patterns. 1066 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1067 int64_t CVal = C->getSExtValue(); 1068 if (CVal >= -15 && CVal <= 16) { 1069 if (!IsUnsigned || CVal != 0) 1070 break; 1071 IsCmpUnsignedZero = true; 1072 } 1073 } 1074 MVT Src1VT = Src1.getSimpleValueType(); 1075 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1076 VMOROpcode; 1077 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1078 default: 1079 llvm_unreachable("Unexpected LMUL!"); 1080 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1081 case RISCVII::VLMUL::lmulenum: \ 1082 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1083 : RISCV::PseudoVMSLT_VX_##suffix; \ 1084 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1085 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1086 break; 1087 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1088 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1089 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1090 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1091 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1092 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1093 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1094 #undef CASE_VMSLT_OPCODES 1095 } 1096 // Mask operations use the LMUL from the mask type. 1097 switch (RISCVTargetLowering::getLMUL(VT)) { 1098 default: 1099 llvm_unreachable("Unexpected LMUL!"); 1100 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1101 case RISCVII::VLMUL::lmulenum: \ 1102 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1103 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1104 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1105 break; 1106 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1107 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1108 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1109 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1110 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1111 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1112 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1113 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1114 } 1115 SDValue SEW = CurDAG->getTargetConstant( 1116 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1117 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1118 SDValue VL; 1119 selectVLOp(Node->getOperand(5), VL); 1120 SDValue MaskedOff = Node->getOperand(1); 1121 SDValue Mask = Node->getOperand(4); 1122 1123 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1124 if (IsCmpUnsignedZero) { 1125 // We don't need vmor if the MaskedOff and the Mask are the same 1126 // value. 1127 if (Mask == MaskedOff) { 1128 ReplaceUses(Node, Mask.getNode()); 1129 return; 1130 } 1131 ReplaceNode(Node, 1132 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1133 {Mask, MaskedOff, VL, MaskSEW})); 1134 return; 1135 } 1136 1137 // If the MaskedOff value and the Mask are the same value use 1138 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1139 // This avoids needing to copy v0 to vd before starting the next sequence. 1140 if (Mask == MaskedOff) { 1141 SDValue Cmp = SDValue( 1142 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1143 0); 1144 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1145 {Mask, Cmp, VL, MaskSEW})); 1146 return; 1147 } 1148 1149 // Mask needs to be copied to V0. 1150 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1151 RISCV::V0, Mask, SDValue()); 1152 SDValue Glue = Chain.getValue(1); 1153 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1154 1155 // Otherwise use 1156 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1157 // The result is mask undisturbed. 1158 // We use the same instructions to emulate mask agnostic behavior, because 1159 // the agnostic result can be either undisturbed or all 1. 1160 SDValue Cmp = SDValue( 1161 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1162 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1163 0); 1164 // vmxor.mm vd, vd, v0 is used to update active value. 1165 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1166 {Cmp, Mask, VL, MaskSEW})); 1167 return; 1168 } 1169 case Intrinsic::riscv_vsetvli_opt: 1170 case Intrinsic::riscv_vsetvlimax_opt: 1171 return selectVSETVLI(Node); 1172 } 1173 break; 1174 } 1175 case ISD::INTRINSIC_W_CHAIN: { 1176 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1177 switch (IntNo) { 1178 // By default we do not custom select any intrinsic. 1179 default: 1180 break; 1181 case Intrinsic::riscv_vsetvli: 1182 case Intrinsic::riscv_vsetvlimax: 1183 return selectVSETVLI(Node); 1184 case Intrinsic::riscv_vlseg2: 1185 case Intrinsic::riscv_vlseg3: 1186 case Intrinsic::riscv_vlseg4: 1187 case Intrinsic::riscv_vlseg5: 1188 case Intrinsic::riscv_vlseg6: 1189 case Intrinsic::riscv_vlseg7: 1190 case Intrinsic::riscv_vlseg8: { 1191 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1192 return; 1193 } 1194 case Intrinsic::riscv_vlseg2_mask: 1195 case Intrinsic::riscv_vlseg3_mask: 1196 case Intrinsic::riscv_vlseg4_mask: 1197 case Intrinsic::riscv_vlseg5_mask: 1198 case Intrinsic::riscv_vlseg6_mask: 1199 case Intrinsic::riscv_vlseg7_mask: 1200 case Intrinsic::riscv_vlseg8_mask: { 1201 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1202 return; 1203 } 1204 case Intrinsic::riscv_vlsseg2: 1205 case Intrinsic::riscv_vlsseg3: 1206 case Intrinsic::riscv_vlsseg4: 1207 case Intrinsic::riscv_vlsseg5: 1208 case Intrinsic::riscv_vlsseg6: 1209 case Intrinsic::riscv_vlsseg7: 1210 case Intrinsic::riscv_vlsseg8: { 1211 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1212 return; 1213 } 1214 case Intrinsic::riscv_vlsseg2_mask: 1215 case Intrinsic::riscv_vlsseg3_mask: 1216 case Intrinsic::riscv_vlsseg4_mask: 1217 case Intrinsic::riscv_vlsseg5_mask: 1218 case Intrinsic::riscv_vlsseg6_mask: 1219 case Intrinsic::riscv_vlsseg7_mask: 1220 case Intrinsic::riscv_vlsseg8_mask: { 1221 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1222 return; 1223 } 1224 case Intrinsic::riscv_vloxseg2: 1225 case Intrinsic::riscv_vloxseg3: 1226 case Intrinsic::riscv_vloxseg4: 1227 case Intrinsic::riscv_vloxseg5: 1228 case Intrinsic::riscv_vloxseg6: 1229 case Intrinsic::riscv_vloxseg7: 1230 case Intrinsic::riscv_vloxseg8: 1231 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1232 return; 1233 case Intrinsic::riscv_vluxseg2: 1234 case Intrinsic::riscv_vluxseg3: 1235 case Intrinsic::riscv_vluxseg4: 1236 case Intrinsic::riscv_vluxseg5: 1237 case Intrinsic::riscv_vluxseg6: 1238 case Intrinsic::riscv_vluxseg7: 1239 case Intrinsic::riscv_vluxseg8: 1240 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1241 return; 1242 case Intrinsic::riscv_vloxseg2_mask: 1243 case Intrinsic::riscv_vloxseg3_mask: 1244 case Intrinsic::riscv_vloxseg4_mask: 1245 case Intrinsic::riscv_vloxseg5_mask: 1246 case Intrinsic::riscv_vloxseg6_mask: 1247 case Intrinsic::riscv_vloxseg7_mask: 1248 case Intrinsic::riscv_vloxseg8_mask: 1249 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1250 return; 1251 case Intrinsic::riscv_vluxseg2_mask: 1252 case Intrinsic::riscv_vluxseg3_mask: 1253 case Intrinsic::riscv_vluxseg4_mask: 1254 case Intrinsic::riscv_vluxseg5_mask: 1255 case Intrinsic::riscv_vluxseg6_mask: 1256 case Intrinsic::riscv_vluxseg7_mask: 1257 case Intrinsic::riscv_vluxseg8_mask: 1258 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1259 return; 1260 case Intrinsic::riscv_vlseg8ff: 1261 case Intrinsic::riscv_vlseg7ff: 1262 case Intrinsic::riscv_vlseg6ff: 1263 case Intrinsic::riscv_vlseg5ff: 1264 case Intrinsic::riscv_vlseg4ff: 1265 case Intrinsic::riscv_vlseg3ff: 1266 case Intrinsic::riscv_vlseg2ff: { 1267 selectVLSEGFF(Node, /*IsMasked*/ false); 1268 return; 1269 } 1270 case Intrinsic::riscv_vlseg8ff_mask: 1271 case Intrinsic::riscv_vlseg7ff_mask: 1272 case Intrinsic::riscv_vlseg6ff_mask: 1273 case Intrinsic::riscv_vlseg5ff_mask: 1274 case Intrinsic::riscv_vlseg4ff_mask: 1275 case Intrinsic::riscv_vlseg3ff_mask: 1276 case Intrinsic::riscv_vlseg2ff_mask: { 1277 selectVLSEGFF(Node, /*IsMasked*/ true); 1278 return; 1279 } 1280 case Intrinsic::riscv_vloxei: 1281 case Intrinsic::riscv_vloxei_mask: 1282 case Intrinsic::riscv_vluxei: 1283 case Intrinsic::riscv_vluxei_mask: { 1284 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1285 IntNo == Intrinsic::riscv_vluxei_mask; 1286 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1287 IntNo == Intrinsic::riscv_vloxei_mask; 1288 1289 MVT VT = Node->getSimpleValueType(0); 1290 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1291 1292 unsigned CurOp = 2; 1293 // Masked intrinsic only have TU version pseduo instructions. 1294 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); 1295 SmallVector<SDValue, 8> Operands; 1296 if (IsTU) 1297 Operands.push_back(Node->getOperand(CurOp++)); 1298 else 1299 // Skip the undef passthru operand for nomask TA version pseudo 1300 CurOp++; 1301 1302 MVT IndexVT; 1303 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1304 /*IsStridedOrIndexed*/ true, Operands, 1305 /*IsLoad=*/true, &IndexVT); 1306 1307 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1308 "Element count mismatch"); 1309 1310 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1311 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1312 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1313 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1314 report_fatal_error("The V extension does not support EEW=64 for index " 1315 "values when XLEN=32"); 1316 } 1317 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1318 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1319 static_cast<unsigned>(IndexLMUL)); 1320 MachineSDNode *Load = 1321 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1322 1323 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1324 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1325 1326 ReplaceNode(Node, Load); 1327 return; 1328 } 1329 case Intrinsic::riscv_vlm: 1330 case Intrinsic::riscv_vle: 1331 case Intrinsic::riscv_vle_mask: 1332 case Intrinsic::riscv_vlse: 1333 case Intrinsic::riscv_vlse_mask: { 1334 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1335 IntNo == Intrinsic::riscv_vlse_mask; 1336 bool IsStrided = 1337 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1338 1339 MVT VT = Node->getSimpleValueType(0); 1340 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1341 1342 unsigned CurOp = 2; 1343 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1344 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1345 // Masked intrinsic only have TU version pseduo instructions. 1346 bool IsTU = HasPassthruOperand && 1347 (IsMasked || !Node->getOperand(CurOp).isUndef()); 1348 SmallVector<SDValue, 8> Operands; 1349 if (IsTU) 1350 Operands.push_back(Node->getOperand(CurOp++)); 1351 else if (HasPassthruOperand) 1352 // Skip the undef passthru operand for nomask TA version pseudo 1353 CurOp++; 1354 1355 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1356 Operands, /*IsLoad=*/true); 1357 1358 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1359 const RISCV::VLEPseudo *P = 1360 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1361 static_cast<unsigned>(LMUL)); 1362 MachineSDNode *Load = 1363 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1364 1365 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1366 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1367 1368 ReplaceNode(Node, Load); 1369 return; 1370 } 1371 case Intrinsic::riscv_vleff: 1372 case Intrinsic::riscv_vleff_mask: { 1373 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1374 1375 MVT VT = Node->getSimpleValueType(0); 1376 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1377 1378 unsigned CurOp = 2; 1379 // Masked intrinsic only have TU version pseduo instructions. 1380 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); 1381 SmallVector<SDValue, 7> Operands; 1382 if (IsTU) 1383 Operands.push_back(Node->getOperand(CurOp++)); 1384 else 1385 // Skip the undef passthru operand for nomask TA version pseudo 1386 CurOp++; 1387 1388 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1389 /*IsStridedOrIndexed*/ false, Operands, 1390 /*IsLoad=*/true); 1391 1392 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1393 const RISCV::VLEPseudo *P = 1394 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1395 Log2SEW, static_cast<unsigned>(LMUL)); 1396 MachineSDNode *Load = CurDAG->getMachineNode( 1397 P->Pseudo, DL, Node->getVTList(), Operands); 1398 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1399 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1400 1401 ReplaceNode(Node, Load); 1402 return; 1403 } 1404 } 1405 break; 1406 } 1407 case ISD::INTRINSIC_VOID: { 1408 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1409 switch (IntNo) { 1410 case Intrinsic::riscv_vsseg2: 1411 case Intrinsic::riscv_vsseg3: 1412 case Intrinsic::riscv_vsseg4: 1413 case Intrinsic::riscv_vsseg5: 1414 case Intrinsic::riscv_vsseg6: 1415 case Intrinsic::riscv_vsseg7: 1416 case Intrinsic::riscv_vsseg8: { 1417 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1418 return; 1419 } 1420 case Intrinsic::riscv_vsseg2_mask: 1421 case Intrinsic::riscv_vsseg3_mask: 1422 case Intrinsic::riscv_vsseg4_mask: 1423 case Intrinsic::riscv_vsseg5_mask: 1424 case Intrinsic::riscv_vsseg6_mask: 1425 case Intrinsic::riscv_vsseg7_mask: 1426 case Intrinsic::riscv_vsseg8_mask: { 1427 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1428 return; 1429 } 1430 case Intrinsic::riscv_vssseg2: 1431 case Intrinsic::riscv_vssseg3: 1432 case Intrinsic::riscv_vssseg4: 1433 case Intrinsic::riscv_vssseg5: 1434 case Intrinsic::riscv_vssseg6: 1435 case Intrinsic::riscv_vssseg7: 1436 case Intrinsic::riscv_vssseg8: { 1437 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1438 return; 1439 } 1440 case Intrinsic::riscv_vssseg2_mask: 1441 case Intrinsic::riscv_vssseg3_mask: 1442 case Intrinsic::riscv_vssseg4_mask: 1443 case Intrinsic::riscv_vssseg5_mask: 1444 case Intrinsic::riscv_vssseg6_mask: 1445 case Intrinsic::riscv_vssseg7_mask: 1446 case Intrinsic::riscv_vssseg8_mask: { 1447 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1448 return; 1449 } 1450 case Intrinsic::riscv_vsoxseg2: 1451 case Intrinsic::riscv_vsoxseg3: 1452 case Intrinsic::riscv_vsoxseg4: 1453 case Intrinsic::riscv_vsoxseg5: 1454 case Intrinsic::riscv_vsoxseg6: 1455 case Intrinsic::riscv_vsoxseg7: 1456 case Intrinsic::riscv_vsoxseg8: 1457 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1458 return; 1459 case Intrinsic::riscv_vsuxseg2: 1460 case Intrinsic::riscv_vsuxseg3: 1461 case Intrinsic::riscv_vsuxseg4: 1462 case Intrinsic::riscv_vsuxseg5: 1463 case Intrinsic::riscv_vsuxseg6: 1464 case Intrinsic::riscv_vsuxseg7: 1465 case Intrinsic::riscv_vsuxseg8: 1466 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1467 return; 1468 case Intrinsic::riscv_vsoxseg2_mask: 1469 case Intrinsic::riscv_vsoxseg3_mask: 1470 case Intrinsic::riscv_vsoxseg4_mask: 1471 case Intrinsic::riscv_vsoxseg5_mask: 1472 case Intrinsic::riscv_vsoxseg6_mask: 1473 case Intrinsic::riscv_vsoxseg7_mask: 1474 case Intrinsic::riscv_vsoxseg8_mask: 1475 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1476 return; 1477 case Intrinsic::riscv_vsuxseg2_mask: 1478 case Intrinsic::riscv_vsuxseg3_mask: 1479 case Intrinsic::riscv_vsuxseg4_mask: 1480 case Intrinsic::riscv_vsuxseg5_mask: 1481 case Intrinsic::riscv_vsuxseg6_mask: 1482 case Intrinsic::riscv_vsuxseg7_mask: 1483 case Intrinsic::riscv_vsuxseg8_mask: 1484 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1485 return; 1486 case Intrinsic::riscv_vsoxei: 1487 case Intrinsic::riscv_vsoxei_mask: 1488 case Intrinsic::riscv_vsuxei: 1489 case Intrinsic::riscv_vsuxei_mask: { 1490 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1491 IntNo == Intrinsic::riscv_vsuxei_mask; 1492 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1493 IntNo == Intrinsic::riscv_vsoxei_mask; 1494 1495 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1496 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1497 1498 unsigned CurOp = 2; 1499 SmallVector<SDValue, 8> Operands; 1500 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1501 1502 MVT IndexVT; 1503 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1504 /*IsStridedOrIndexed*/ true, Operands, 1505 /*IsLoad=*/false, &IndexVT); 1506 1507 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1508 "Element count mismatch"); 1509 1510 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1511 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1512 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1513 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1514 report_fatal_error("The V extension does not support EEW=64 for index " 1515 "values when XLEN=32"); 1516 } 1517 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1518 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1519 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1520 MachineSDNode *Store = 1521 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1522 1523 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1524 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1525 1526 ReplaceNode(Node, Store); 1527 return; 1528 } 1529 case Intrinsic::riscv_vsm: 1530 case Intrinsic::riscv_vse: 1531 case Intrinsic::riscv_vse_mask: 1532 case Intrinsic::riscv_vsse: 1533 case Intrinsic::riscv_vsse_mask: { 1534 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1535 IntNo == Intrinsic::riscv_vsse_mask; 1536 bool IsStrided = 1537 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1538 1539 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1540 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1541 1542 unsigned CurOp = 2; 1543 SmallVector<SDValue, 8> Operands; 1544 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1545 1546 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1547 Operands); 1548 1549 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1550 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1551 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1552 MachineSDNode *Store = 1553 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1554 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1555 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1556 1557 ReplaceNode(Node, Store); 1558 return; 1559 } 1560 } 1561 break; 1562 } 1563 case ISD::BITCAST: { 1564 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1565 // Just drop bitcasts between vectors if both are fixed or both are 1566 // scalable. 1567 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1568 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1569 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1570 CurDAG->RemoveDeadNode(Node); 1571 return; 1572 } 1573 break; 1574 } 1575 case ISD::INSERT_SUBVECTOR: { 1576 SDValue V = Node->getOperand(0); 1577 SDValue SubV = Node->getOperand(1); 1578 SDLoc DL(SubV); 1579 auto Idx = Node->getConstantOperandVal(2); 1580 MVT SubVecVT = SubV.getSimpleValueType(); 1581 1582 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1583 MVT SubVecContainerVT = SubVecVT; 1584 // Establish the correct scalable-vector types for any fixed-length type. 1585 if (SubVecVT.isFixedLengthVector()) 1586 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1587 if (VT.isFixedLengthVector()) 1588 VT = TLI.getContainerForFixedLengthVector(VT); 1589 1590 const auto *TRI = Subtarget->getRegisterInfo(); 1591 unsigned SubRegIdx; 1592 std::tie(SubRegIdx, Idx) = 1593 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1594 VT, SubVecContainerVT, Idx, TRI); 1595 1596 // If the Idx hasn't been completely eliminated then this is a subvector 1597 // insert which doesn't naturally align to a vector register. These must 1598 // be handled using instructions to manipulate the vector registers. 1599 if (Idx != 0) 1600 break; 1601 1602 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1603 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1604 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1605 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1606 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1607 assert((!IsSubVecPartReg || V.isUndef()) && 1608 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1609 "the subvector is smaller than a full-sized register"); 1610 1611 // If we haven't set a SubRegIdx, then we must be going between 1612 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1613 if (SubRegIdx == RISCV::NoSubRegister) { 1614 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1615 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1616 InRegClassID && 1617 "Unexpected subvector extraction"); 1618 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1619 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1620 DL, VT, SubV, RC); 1621 ReplaceNode(Node, NewNode); 1622 return; 1623 } 1624 1625 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1626 ReplaceNode(Node, Insert.getNode()); 1627 return; 1628 } 1629 case ISD::EXTRACT_SUBVECTOR: { 1630 SDValue V = Node->getOperand(0); 1631 auto Idx = Node->getConstantOperandVal(1); 1632 MVT InVT = V.getSimpleValueType(); 1633 SDLoc DL(V); 1634 1635 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1636 MVT SubVecContainerVT = VT; 1637 // Establish the correct scalable-vector types for any fixed-length type. 1638 if (VT.isFixedLengthVector()) 1639 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1640 if (InVT.isFixedLengthVector()) 1641 InVT = TLI.getContainerForFixedLengthVector(InVT); 1642 1643 const auto *TRI = Subtarget->getRegisterInfo(); 1644 unsigned SubRegIdx; 1645 std::tie(SubRegIdx, Idx) = 1646 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1647 InVT, SubVecContainerVT, Idx, TRI); 1648 1649 // If the Idx hasn't been completely eliminated then this is a subvector 1650 // extract which doesn't naturally align to a vector register. These must 1651 // be handled using instructions to manipulate the vector registers. 1652 if (Idx != 0) 1653 break; 1654 1655 // If we haven't set a SubRegIdx, then we must be going between 1656 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1657 if (SubRegIdx == RISCV::NoSubRegister) { 1658 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1659 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1660 InRegClassID && 1661 "Unexpected subvector extraction"); 1662 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1663 SDNode *NewNode = 1664 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1665 ReplaceNode(Node, NewNode); 1666 return; 1667 } 1668 1669 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1670 ReplaceNode(Node, Extract.getNode()); 1671 return; 1672 } 1673 case ISD::SPLAT_VECTOR: 1674 case RISCVISD::VMV_S_X_VL: 1675 case RISCVISD::VFMV_S_F_VL: 1676 case RISCVISD::VMV_V_X_VL: 1677 case RISCVISD::VFMV_V_F_VL: { 1678 // Try to match splat of a scalar load to a strided load with stride of x0. 1679 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1680 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1681 bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; 1682 if (HasPassthruOperand && !Node->getOperand(0).isUndef()) 1683 break; 1684 SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); 1685 auto *Ld = dyn_cast<LoadSDNode>(Src); 1686 if (!Ld) 1687 break; 1688 EVT MemVT = Ld->getMemoryVT(); 1689 // The memory VT should be the same size as the element type. 1690 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1691 break; 1692 if (!IsProfitableToFold(Src, Node, Node) || 1693 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1694 break; 1695 1696 SDValue VL; 1697 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1698 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1699 else if (IsScalarMove) { 1700 // We could deal with more VL if we update the VSETVLI insert pass to 1701 // avoid introducing more VSETVLI. 1702 if (!isOneConstant(Node->getOperand(2))) 1703 break; 1704 selectVLOp(Node->getOperand(2), VL); 1705 } else 1706 selectVLOp(Node->getOperand(2), VL); 1707 1708 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1709 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1710 1711 SDValue Operands[] = {Ld->getBasePtr(), 1712 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1713 Ld->getChain()}; 1714 1715 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1716 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1717 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1718 Log2SEW, static_cast<unsigned>(LMUL)); 1719 MachineSDNode *Load = 1720 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1721 1722 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 1723 1724 ReplaceNode(Node, Load); 1725 return; 1726 } 1727 } 1728 1729 // Select the default instruction. 1730 SelectCode(Node); 1731 } 1732 1733 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1734 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1735 switch (ConstraintID) { 1736 case InlineAsm::Constraint_m: 1737 // We just support simple memory operands that have a single address 1738 // operand and need no special handling. 1739 OutOps.push_back(Op); 1740 return false; 1741 case InlineAsm::Constraint_A: 1742 OutOps.push_back(Op); 1743 return false; 1744 default: 1745 break; 1746 } 1747 1748 return true; 1749 } 1750 1751 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 1752 SDValue &Offset) { 1753 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1754 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1755 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1756 return true; 1757 } 1758 1759 return false; 1760 } 1761 1762 // Select a frame index and an optional immediate offset from an ADD or OR. 1763 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 1764 SDValue &Offset) { 1765 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1766 return true; 1767 1768 if (!CurDAG->isBaseWithConstantOffset(Addr)) 1769 return false; 1770 1771 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 1772 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1773 if (isInt<12>(CVal)) { 1774 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 1775 Subtarget->getXLenVT()); 1776 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), 1777 Subtarget->getXLenVT()); 1778 return true; 1779 } 1780 } 1781 1782 return false; 1783 } 1784 1785 // Fold constant addresses. 1786 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, 1787 const MVT VT, const RISCVSubtarget *Subtarget, 1788 SDValue Addr, SDValue &Base, SDValue &Offset) { 1789 if (!isa<ConstantSDNode>(Addr)) 1790 return false; 1791 1792 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); 1793 1794 // If the constant is a simm12, we can fold the whole constant and use X0 as 1795 // the base. If the constant can be materialized with LUI+simm12, use LUI as 1796 // the base. We can't use generateInstSeq because it favors LUI+ADDIW. 1797 int64_t Lo12 = SignExtend64<12>(CVal); 1798 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; 1799 if (!Subtarget->is64Bit() || isInt<32>(Hi)) { 1800 if (Hi) { 1801 int64_t Hi20 = (Hi >> 12) & 0xfffff; 1802 Base = SDValue( 1803 CurDAG->getMachineNode(RISCV::LUI, DL, VT, 1804 CurDAG->getTargetConstant(Hi20, DL, VT)), 1805 0); 1806 } else { 1807 Base = CurDAG->getRegister(RISCV::X0, VT); 1808 } 1809 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 1810 return true; 1811 } 1812 1813 // Ask how constant materialization would handle this constant. 1814 RISCVMatInt::InstSeq Seq = 1815 RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits()); 1816 1817 // If the last instruction would be an ADDI, we can fold its immediate and 1818 // emit the rest of the sequence as the base. 1819 if (Seq.back().Opc != RISCV::ADDI) 1820 return false; 1821 Lo12 = Seq.back().Imm; 1822 1823 // Drop the last instruction. 1824 Seq.pop_back(); 1825 assert(!Seq.empty() && "Expected more instructions in sequence"); 1826 1827 Base = SDValue(selectImmSeq(CurDAG, DL, VT, Seq), 0); 1828 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 1829 return true; 1830 } 1831 1832 // Is this ADD instruction only used as the base pointer of scalar loads and 1833 // stores? 1834 static bool isWorthFoldingAdd(SDValue Add) { 1835 for (auto Use : Add->uses()) { 1836 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 1837 Use->getOpcode() != ISD::ATOMIC_LOAD && 1838 Use->getOpcode() != ISD::ATOMIC_STORE) 1839 return false; 1840 EVT VT = cast<MemSDNode>(Use)->getMemoryVT(); 1841 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && 1842 VT != MVT::f64) 1843 return false; 1844 // Don't allow stores of the value. It must be used as the address. 1845 if (Use->getOpcode() == ISD::STORE && 1846 cast<StoreSDNode>(Use)->getValue() == Add) 1847 return false; 1848 if (Use->getOpcode() == ISD::ATOMIC_STORE && 1849 cast<AtomicSDNode>(Use)->getVal() == Add) 1850 return false; 1851 } 1852 1853 return true; 1854 } 1855 1856 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 1857 SDValue &Offset) { 1858 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1859 return true; 1860 1861 SDLoc DL(Addr); 1862 MVT VT = Addr.getSimpleValueType(); 1863 1864 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 1865 Base = Addr.getOperand(0); 1866 Offset = Addr.getOperand(1); 1867 return true; 1868 } 1869 1870 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1871 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1872 if (isInt<12>(CVal)) { 1873 Base = Addr.getOperand(0); 1874 if (Base.getOpcode() == RISCVISD::ADD_LO) { 1875 SDValue LoOperand = Base.getOperand(1); 1876 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 1877 // If the Lo in (ADD_LO hi, lo) is a global variable's address 1878 // (its low part, really), then we can rely on the alignment of that 1879 // variable to provide a margin of safety before low part can overflow 1880 // the 12 bits of the load/store offset. Check if CVal falls within 1881 // that margin; if so (low part + CVal) can't overflow. 1882 const DataLayout &DL = CurDAG->getDataLayout(); 1883 Align Alignment = commonAlignment( 1884 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 1885 if (CVal == 0 || Alignment > CVal) { 1886 int64_t CombinedOffset = CVal + GA->getOffset(); 1887 Base = Base.getOperand(0); 1888 Offset = CurDAG->getTargetGlobalAddress( 1889 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 1890 CombinedOffset, GA->getTargetFlags()); 1891 return true; 1892 } 1893 } 1894 } 1895 1896 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 1897 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 1898 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 1899 return true; 1900 } 1901 } 1902 1903 // Handle ADD with large immediates. 1904 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 1905 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1906 assert(!isInt<12>(CVal) && "simm12 not already handled?"); 1907 1908 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use 1909 // an ADDI for part of the offset and fold the rest into the load/store. 1910 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 1911 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { 1912 int64_t Adj = CVal < 0 ? -2048 : 2047; 1913 Base = SDValue( 1914 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 1915 CurDAG->getTargetConstant(Adj, DL, VT)), 1916 0); 1917 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); 1918 return true; 1919 } 1920 1921 // For larger immediates, we might be able to save one instruction from 1922 // constant materialization by folding the Lo12 bits of the immediate into 1923 // the address. We should only do this if the ADD is only used by loads and 1924 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled 1925 // separately with the full materialized immediate creating extra 1926 // instructions. 1927 if (isWorthFoldingAdd(Addr) && 1928 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 1929 Offset)) { 1930 // Insert an ADD instruction with the materialized Hi52 bits. 1931 Base = SDValue( 1932 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 1933 0); 1934 return true; 1935 } 1936 } 1937 1938 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) 1939 return true; 1940 1941 Base = Addr; 1942 Offset = CurDAG->getTargetConstant(0, DL, VT); 1943 return true; 1944 } 1945 1946 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1947 SDValue &ShAmt) { 1948 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1949 // amount. If there is an AND on the shift amount, we can bypass it if it 1950 // doesn't affect any of those bits. 1951 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1952 const APInt &AndMask = N->getConstantOperandAPInt(1); 1953 1954 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1955 // mask that covers the bits needed to represent all shift amounts. 1956 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1957 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1958 1959 if (ShMask.isSubsetOf(AndMask)) { 1960 ShAmt = N.getOperand(0); 1961 return true; 1962 } 1963 1964 // SimplifyDemandedBits may have optimized the mask so try restoring any 1965 // bits that are known zero. 1966 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1967 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1968 ShAmt = N.getOperand(0); 1969 return true; 1970 } 1971 } else if (N.getOpcode() == ISD::SUB && 1972 isa<ConstantSDNode>(N.getOperand(0))) { 1973 uint64_t Imm = N.getConstantOperandVal(0); 1974 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 1975 // generate a NEG instead of a SUB of a constant. 1976 if (Imm != 0 && Imm % ShiftWidth == 0) { 1977 SDLoc DL(N); 1978 EVT VT = N.getValueType(); 1979 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 1980 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 1981 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 1982 N.getOperand(1)); 1983 ShAmt = SDValue(Neg, 0); 1984 return true; 1985 } 1986 } 1987 1988 ShAmt = N; 1989 return true; 1990 } 1991 1992 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1993 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1994 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1995 Val = N.getOperand(0); 1996 return true; 1997 } 1998 MVT VT = N.getSimpleValueType(); 1999 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 2000 Val = N; 2001 return true; 2002 } 2003 2004 return false; 2005 } 2006 2007 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 2008 if (N.getOpcode() == ISD::AND) { 2009 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2010 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 2011 Val = N.getOperand(0); 2012 return true; 2013 } 2014 } 2015 MVT VT = N.getSimpleValueType(); 2016 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 2017 if (CurDAG->MaskedValueIsZero(N, Mask)) { 2018 Val = N; 2019 return true; 2020 } 2021 2022 return false; 2023 } 2024 2025 /// Look for various patterns that can be done with a SHL that can be folded 2026 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 2027 /// SHXADD we are trying to match. 2028 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 2029 SDValue &Val) { 2030 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 2031 SDValue N0 = N.getOperand(0); 2032 2033 bool LeftShift = N0.getOpcode() == ISD::SHL; 2034 if ((LeftShift || N0.getOpcode() == ISD::SRL) && 2035 isa<ConstantSDNode>(N0.getOperand(1))) { 2036 uint64_t Mask = N.getConstantOperandVal(1); 2037 unsigned C2 = N0.getConstantOperandVal(1); 2038 2039 unsigned XLen = Subtarget->getXLen(); 2040 if (LeftShift) 2041 Mask &= maskTrailingZeros<uint64_t>(C2); 2042 else 2043 Mask &= maskTrailingOnes<uint64_t>(XLen - C2); 2044 2045 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no 2046 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 2047 // followed by a SHXADD with c3 for the X amount. 2048 if (isShiftedMask_64(Mask)) { 2049 unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); 2050 unsigned Trailing = countTrailingZeros(Mask); 2051 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { 2052 SDLoc DL(N); 2053 EVT VT = N.getValueType(); 2054 Val = SDValue(CurDAG->getMachineNode( 2055 RISCV::SRLI, DL, VT, N0.getOperand(0), 2056 CurDAG->getTargetConstant(Trailing - C2, DL, VT)), 2057 0); 2058 return true; 2059 } 2060 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 2061 // leading zeros and c3 trailing zeros. We can use an SRLI by C3 2062 // followed by a SHXADD using c3 for the X amount. 2063 if (!LeftShift && Leading == C2 && Trailing == ShAmt) { 2064 SDLoc DL(N); 2065 EVT VT = N.getValueType(); 2066 Val = SDValue( 2067 CurDAG->getMachineNode( 2068 RISCV::SRLI, DL, VT, N0.getOperand(0), 2069 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), 2070 0); 2071 return true; 2072 } 2073 } 2074 } 2075 } 2076 2077 bool LeftShift = N.getOpcode() == ISD::SHL; 2078 if ((LeftShift || N.getOpcode() == ISD::SRL) && 2079 isa<ConstantSDNode>(N.getOperand(1))) { 2080 SDValue N0 = N.getOperand(0); 2081 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 2082 isa<ConstantSDNode>(N0.getOperand(1))) { 2083 uint64_t Mask = N0.getConstantOperandVal(1); 2084 if (isShiftedMask_64(Mask)) { 2085 unsigned C1 = N.getConstantOperandVal(1); 2086 unsigned XLen = Subtarget->getXLen(); 2087 unsigned Leading = XLen - (64 - countLeadingZeros(Mask)); 2088 unsigned Trailing = countTrailingZeros(Mask); 2089 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 2090 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 2091 if (LeftShift && Leading == 32 && Trailing > 0 && 2092 (Trailing + C1) == ShAmt) { 2093 SDLoc DL(N); 2094 EVT VT = N.getValueType(); 2095 Val = SDValue(CurDAG->getMachineNode( 2096 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2097 CurDAG->getTargetConstant(Trailing, DL, VT)), 2098 0); 2099 return true; 2100 } 2101 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 2102 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 2103 if (!LeftShift && Leading == 32 && Trailing > C1 && 2104 (Trailing - C1) == ShAmt) { 2105 SDLoc DL(N); 2106 EVT VT = N.getValueType(); 2107 Val = SDValue(CurDAG->getMachineNode( 2108 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2109 CurDAG->getTargetConstant(Trailing, DL, VT)), 2110 0); 2111 return true; 2112 } 2113 } 2114 } 2115 } 2116 2117 return false; 2118 } 2119 2120 // Return true if all users of this SDNode* only consume the lower \p Bits. 2121 // This can be used to form W instructions for add/sub/mul/shl even when the 2122 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 2123 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 2124 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 2125 // the add/sub/mul/shl to become non-W instructions. By checking the users we 2126 // may be able to use a W instruction and CSE with the other instruction if 2127 // this has happened. We could try to detect that the CSE opportunity exists 2128 // before doing this, but that would be more complicated. 2129 // TODO: Does this need to look through AND/OR/XOR to their users to find more 2130 // opportunities. 2131 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 2132 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2133 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2134 Node->getOpcode() == ISD::SRL || 2135 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2136 Node->getOpcode() == RISCVISD::GREV || 2137 Node->getOpcode() == RISCVISD::GORC || 2138 isa<ConstantSDNode>(Node)) && 2139 "Unexpected opcode"); 2140 2141 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2142 SDNode *User = *UI; 2143 // Users of this node should have already been instruction selected 2144 if (!User->isMachineOpcode()) 2145 return false; 2146 2147 // TODO: Add more opcodes? 2148 switch (User->getMachineOpcode()) { 2149 default: 2150 return false; 2151 case RISCV::ADDW: 2152 case RISCV::ADDIW: 2153 case RISCV::SUBW: 2154 case RISCV::MULW: 2155 case RISCV::SLLW: 2156 case RISCV::SLLIW: 2157 case RISCV::SRAW: 2158 case RISCV::SRAIW: 2159 case RISCV::SRLW: 2160 case RISCV::SRLIW: 2161 case RISCV::DIVW: 2162 case RISCV::DIVUW: 2163 case RISCV::REMW: 2164 case RISCV::REMUW: 2165 case RISCV::ROLW: 2166 case RISCV::RORW: 2167 case RISCV::RORIW: 2168 case RISCV::CLZW: 2169 case RISCV::CTZW: 2170 case RISCV::CPOPW: 2171 case RISCV::SLLI_UW: 2172 case RISCV::FMV_W_X: 2173 case RISCV::FCVT_H_W: 2174 case RISCV::FCVT_H_WU: 2175 case RISCV::FCVT_S_W: 2176 case RISCV::FCVT_S_WU: 2177 case RISCV::FCVT_D_W: 2178 case RISCV::FCVT_D_WU: 2179 if (Bits < 32) 2180 return false; 2181 break; 2182 case RISCV::SLLI: 2183 // SLLI only uses the lower (XLen - ShAmt) bits. 2184 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2185 return false; 2186 break; 2187 case RISCV::ANDI: 2188 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 2189 return false; 2190 break; 2191 case RISCV::SEXT_B: 2192 if (Bits < 8) 2193 return false; 2194 break; 2195 case RISCV::SEXT_H: 2196 case RISCV::FMV_H_X: 2197 case RISCV::ZEXT_H_RV32: 2198 case RISCV::ZEXT_H_RV64: 2199 if (Bits < 16) 2200 return false; 2201 break; 2202 case RISCV::ADD_UW: 2203 case RISCV::SH1ADD_UW: 2204 case RISCV::SH2ADD_UW: 2205 case RISCV::SH3ADD_UW: 2206 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 2207 // 32 bits. 2208 if (UI.getOperandNo() != 0 || Bits < 32) 2209 return false; 2210 break; 2211 case RISCV::SB: 2212 if (UI.getOperandNo() != 0 || Bits < 8) 2213 return false; 2214 break; 2215 case RISCV::SH: 2216 if (UI.getOperandNo() != 0 || Bits < 16) 2217 return false; 2218 break; 2219 case RISCV::SW: 2220 if (UI.getOperandNo() != 0 || Bits < 32) 2221 return false; 2222 break; 2223 } 2224 } 2225 2226 return true; 2227 } 2228 2229 // Select VL as a 5 bit immediate or a value that will become a register. This 2230 // allows us to choose betwen VSETIVLI or VSETVLI later. 2231 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 2232 auto *C = dyn_cast<ConstantSDNode>(N); 2233 if (C && isUInt<5>(C->getZExtValue())) { 2234 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 2235 N->getValueType(0)); 2236 } else if (C && C->isAllOnesValue()) { 2237 // Treat all ones as VLMax. 2238 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2239 N->getValueType(0)); 2240 } else if (isa<RegisterSDNode>(N) && 2241 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 2242 // All our VL operands use an operand that allows GPRNoX0 or an immediate 2243 // as the register class. Convert X0 to a special immediate to pass the 2244 // MachineVerifier. This is recognized specially by the vsetvli insertion 2245 // pass. 2246 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2247 N->getValueType(0)); 2248 } else { 2249 VL = N; 2250 } 2251 2252 return true; 2253 } 2254 2255 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 2256 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 2257 return false; 2258 SplatVal = N.getOperand(1); 2259 return true; 2260 } 2261 2262 using ValidateFn = bool (*)(int64_t); 2263 2264 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 2265 SelectionDAG &DAG, 2266 const RISCVSubtarget &Subtarget, 2267 ValidateFn ValidateImm) { 2268 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2269 !isa<ConstantSDNode>(N.getOperand(1))) 2270 return false; 2271 2272 int64_t SplatImm = 2273 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2274 2275 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 2276 // type is wider than the resulting vector element type: an implicit 2277 // truncation first takes place. Therefore, perform a manual 2278 // truncation/sign-extension in order to ignore any truncated bits and catch 2279 // any zero-extended immediate. 2280 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 2281 // sign-extending to (XLenVT -1). 2282 MVT XLenVT = Subtarget.getXLenVT(); 2283 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 2284 "Unexpected splat operand type"); 2285 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 2286 if (EltVT.bitsLT(XLenVT)) 2287 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 2288 2289 if (!ValidateImm(SplatImm)) 2290 return false; 2291 2292 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 2293 return true; 2294 } 2295 2296 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2297 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2298 [](int64_t Imm) { return isInt<5>(Imm); }); 2299 } 2300 2301 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2302 return selectVSplatSimmHelper( 2303 N, SplatVal, *CurDAG, *Subtarget, 2304 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2305 } 2306 2307 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2308 SDValue &SplatVal) { 2309 return selectVSplatSimmHelper( 2310 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2311 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2312 }); 2313 } 2314 2315 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 2316 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2317 !isa<ConstantSDNode>(N.getOperand(1))) 2318 return false; 2319 2320 int64_t SplatImm = 2321 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2322 2323 if (!isUInt<5>(SplatImm)) 2324 return false; 2325 2326 SplatVal = 2327 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 2328 2329 return true; 2330 } 2331 2332 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2333 SDValue &Imm) { 2334 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2335 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2336 2337 if (!isInt<5>(ImmVal)) 2338 return false; 2339 2340 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2341 return true; 2342 } 2343 2344 return false; 2345 } 2346 2347 // Try to remove sext.w if the input is a W instruction or can be made into 2348 // a W instruction cheaply. 2349 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2350 // Look for the sext.w pattern, addiw rd, rs1, 0. 2351 if (N->getMachineOpcode() != RISCV::ADDIW || 2352 !isNullConstant(N->getOperand(1))) 2353 return false; 2354 2355 SDValue N0 = N->getOperand(0); 2356 if (!N0.isMachineOpcode()) 2357 return false; 2358 2359 switch (N0.getMachineOpcode()) { 2360 default: 2361 break; 2362 case RISCV::ADD: 2363 case RISCV::ADDI: 2364 case RISCV::SUB: 2365 case RISCV::MUL: 2366 case RISCV::SLLI: { 2367 // Convert sext.w+add/sub/mul to their W instructions. This will create 2368 // a new independent instruction. This improves latency. 2369 unsigned Opc; 2370 switch (N0.getMachineOpcode()) { 2371 default: 2372 llvm_unreachable("Unexpected opcode!"); 2373 case RISCV::ADD: Opc = RISCV::ADDW; break; 2374 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2375 case RISCV::SUB: Opc = RISCV::SUBW; break; 2376 case RISCV::MUL: Opc = RISCV::MULW; break; 2377 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2378 } 2379 2380 SDValue N00 = N0.getOperand(0); 2381 SDValue N01 = N0.getOperand(1); 2382 2383 // Shift amount needs to be uimm5. 2384 if (N0.getMachineOpcode() == RISCV::SLLI && 2385 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2386 break; 2387 2388 SDNode *Result = 2389 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2390 N00, N01); 2391 ReplaceUses(N, Result); 2392 return true; 2393 } 2394 case RISCV::ADDW: 2395 case RISCV::ADDIW: 2396 case RISCV::SUBW: 2397 case RISCV::MULW: 2398 case RISCV::SLLIW: 2399 case RISCV::GREVIW: 2400 case RISCV::GORCIW: 2401 // Result is already sign extended just remove the sext.w. 2402 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2403 ReplaceUses(N, N0.getNode()); 2404 return true; 2405 } 2406 2407 return false; 2408 } 2409 2410 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2411 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2412 // take the form of a V0 physical register operand, with a glued 2413 // register-setting instruction. 2414 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2415 const RISCV::RISCVMaskedPseudoInfo *I = 2416 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2417 if (!I) 2418 return false; 2419 2420 unsigned MaskOpIdx = I->MaskOpIdx; 2421 2422 // Check that we're using V0 as a mask register. 2423 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2424 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2425 return false; 2426 2427 // The glued user defines V0. 2428 const auto *Glued = N->getGluedNode(); 2429 2430 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2431 return false; 2432 2433 // Check that we're defining V0 as a mask register. 2434 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2435 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2436 return false; 2437 2438 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2439 SDValue MaskSetter = Glued->getOperand(2); 2440 2441 const auto IsVMSet = [](unsigned Opc) { 2442 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2443 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2444 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2445 Opc == RISCV::PseudoVMSET_M_B8; 2446 }; 2447 2448 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2449 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2450 // assume that it's all-ones? Same applies to its VL. 2451 if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) 2452 return false; 2453 2454 // Retrieve the tail policy operand index, if any. 2455 Optional<unsigned> TailPolicyOpIdx; 2456 const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); 2457 const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); 2458 2459 bool IsTA = true; 2460 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2461 // The last operand of the pseudo is the policy op, but we might have a 2462 // Glue operand last. We might also have a chain. 2463 TailPolicyOpIdx = N->getNumOperands() - 1; 2464 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) 2465 (*TailPolicyOpIdx)--; 2466 if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) 2467 (*TailPolicyOpIdx)--; 2468 2469 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & 2470 RISCVII::TAIL_AGNOSTIC)) { 2471 // Keep the true-masked instruction when there is no unmasked TU 2472 // instruction 2473 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) 2474 return false; 2475 // We can't use TA if the tie-operand is not IMPLICIT_DEF 2476 if (!N->getOperand(0).isUndef()) 2477 IsTA = false; 2478 } 2479 } 2480 2481 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; 2482 2483 // Check that we're dropping the mask operand and any policy operand 2484 // when we transform to this unmasked pseudo. Additionally, if this insturtion 2485 // is tail agnostic, the unmasked instruction should not have a merge op. 2486 uint64_t TSFlags = TII.get(Opc).TSFlags; 2487 assert((IsTA != RISCVII::hasMergeOp(TSFlags)) && 2488 RISCVII::hasDummyMaskOp(TSFlags) && 2489 !RISCVII::hasVecPolicyOp(TSFlags) && 2490 "Unexpected pseudo to transform to"); 2491 (void)TSFlags; 2492 2493 SmallVector<SDValue, 8> Ops; 2494 // Skip the merge operand at index 0 if IsTA 2495 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { 2496 // Skip the mask, the policy, and the Glue. 2497 SDValue Op = N->getOperand(I); 2498 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2499 Op.getValueType() == MVT::Glue) 2500 continue; 2501 Ops.push_back(Op); 2502 } 2503 2504 // Transitively apply any node glued to our new node. 2505 if (auto *TGlued = Glued->getGluedNode()) 2506 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2507 2508 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2509 ReplaceUses(N, Result); 2510 2511 return true; 2512 } 2513 2514 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2515 // for instruction scheduling. 2516 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 2517 CodeGenOpt::Level OptLevel) { 2518 return new RISCVDAGToDAGISel(TM, OptLevel); 2519 } 2520