1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/MathExtras.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include <optional> 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 #define PASS_NAME "RISCV DAG->DAG Pattern Instruction Selection" 30 31 namespace llvm::RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #define GET_RISCVMaskedPseudosTable_IMPL 41 #include "RISCVGenSearchableTables.inc" 42 } // namespace llvm::RISCV 43 44 static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) { 45 assert(Node->getNumOperands() > 0 && "Node with no operands"); 46 unsigned LastOpIdx = Node->getNumOperands() - 1; 47 if (Node->getOperand(LastOpIdx).getValueType() == MVT::Glue) 48 --LastOpIdx; 49 if (Node->getOperand(LastOpIdx).getValueType() == MVT::Other) 50 --LastOpIdx; 51 return LastOpIdx; 52 } 53 54 static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID) { 55 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags)); 56 (void)MCID; 57 return getLastNonGlueOrChainOpIdx(Node); 58 } 59 60 void RISCVDAGToDAGISel::PreprocessISelDAG() { 61 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 62 63 bool MadeChange = false; 64 while (Position != CurDAG->allnodes_begin()) { 65 SDNode *N = &*--Position; 66 if (N->use_empty()) 67 continue; 68 69 SDValue Result; 70 switch (N->getOpcode()) { 71 case ISD::SPLAT_VECTOR: { 72 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 73 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 74 MVT VT = N->getSimpleValueType(0); 75 unsigned Opc = 76 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 77 SDLoc DL(N); 78 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 79 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), 80 N->getOperand(0), VL); 81 break; 82 } 83 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { 84 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 85 // load. Done after lowering and combining so that we have a chance to 86 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 87 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 88 MVT VT = N->getSimpleValueType(0); 89 SDValue Passthru = N->getOperand(0); 90 SDValue Lo = N->getOperand(1); 91 SDValue Hi = N->getOperand(2); 92 SDValue VL = N->getOperand(3); 93 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 94 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 95 "Unexpected VTs!"); 96 MachineFunction &MF = CurDAG->getMachineFunction(); 97 RISCVMachineFunctionInfo *FuncInfo = 98 MF.getInfo<RISCVMachineFunctionInfo>(); 99 SDLoc DL(N); 100 101 // We use the same frame index we use for moving two i32s into 64-bit FPR. 102 // This is an analogous operation. 103 int FI = FuncInfo->getMoveF64FrameIndex(MF); 104 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 105 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 106 SDValue StackSlot = 107 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 108 109 SDValue Chain = CurDAG->getEntryNode(); 110 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 111 112 SDValue OffsetSlot = 113 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 114 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 115 Align(8)); 116 117 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 118 119 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 120 SDValue IntID = 121 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 122 SDValue Ops[] = {Chain, 123 IntID, 124 Passthru, 125 StackSlot, 126 CurDAG->getRegister(RISCV::X0, MVT::i64), 127 VL}; 128 129 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 130 MVT::i64, MPI, Align(8), 131 MachineMemOperand::MOLoad); 132 break; 133 } 134 } 135 136 if (Result) { 137 LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: "); 138 LLVM_DEBUG(N->dump(CurDAG)); 139 LLVM_DEBUG(dbgs() << "\nNew: "); 140 LLVM_DEBUG(Result->dump(CurDAG)); 141 LLVM_DEBUG(dbgs() << "\n"); 142 143 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 144 MadeChange = true; 145 } 146 } 147 148 if (MadeChange) 149 CurDAG->RemoveDeadNodes(); 150 } 151 152 void RISCVDAGToDAGISel::PostprocessISelDAG() { 153 HandleSDNode Dummy(CurDAG->getRoot()); 154 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 155 156 bool MadeChange = false; 157 while (Position != CurDAG->allnodes_begin()) { 158 SDNode *N = &*--Position; 159 // Skip dead nodes and any non-machine opcodes. 160 if (N->use_empty() || !N->isMachineOpcode()) 161 continue; 162 163 MadeChange |= doPeepholeSExtW(N); 164 MadeChange |= doPeepholeMaskedRVV(N); 165 } 166 167 CurDAG->setRoot(Dummy.getValue()); 168 169 MadeChange |= doPeepholeMergeVVMFold(); 170 171 if (MadeChange) 172 CurDAG->RemoveDeadNodes(); 173 } 174 175 static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 176 RISCVMatInt::InstSeq &Seq) { 177 SDNode *Result = nullptr; 178 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 179 for (RISCVMatInt::Inst &Inst : Seq) { 180 SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT); 181 switch (Inst.getOpndKind()) { 182 case RISCVMatInt::Imm: 183 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm); 184 break; 185 case RISCVMatInt::RegX0: 186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, 187 CurDAG->getRegister(RISCV::X0, VT)); 188 break; 189 case RISCVMatInt::RegReg: 190 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg); 191 break; 192 case RISCVMatInt::RegImm: 193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm); 194 break; 195 } 196 197 // Only the first instruction has X0 as its source. 198 SrcReg = SDValue(Result, 0); 199 } 200 201 return Result; 202 } 203 204 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 205 int64_t Imm, const RISCVSubtarget &Subtarget) { 206 RISCVMatInt::InstSeq Seq = 207 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 208 209 return selectImmSeq(CurDAG, DL, VT, Seq); 210 } 211 212 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 213 unsigned NF, RISCVII::VLMUL LMUL) { 214 static const unsigned M1TupleRegClassIDs[] = { 215 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 216 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 217 RISCV::VRN8M1RegClassID}; 218 static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, 219 RISCV::VRN3M2RegClassID, 220 RISCV::VRN4M2RegClassID}; 221 222 assert(Regs.size() >= 2 && Regs.size() <= 8); 223 224 unsigned RegClassID; 225 unsigned SubReg0; 226 switch (LMUL) { 227 default: 228 llvm_unreachable("Invalid LMUL."); 229 case RISCVII::VLMUL::LMUL_F8: 230 case RISCVII::VLMUL::LMUL_F4: 231 case RISCVII::VLMUL::LMUL_F2: 232 case RISCVII::VLMUL::LMUL_1: 233 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, 234 "Unexpected subreg numbering"); 235 SubReg0 = RISCV::sub_vrm1_0; 236 RegClassID = M1TupleRegClassIDs[NF - 2]; 237 break; 238 case RISCVII::VLMUL::LMUL_2: 239 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, 240 "Unexpected subreg numbering"); 241 SubReg0 = RISCV::sub_vrm2_0; 242 RegClassID = M2TupleRegClassIDs[NF - 2]; 243 break; 244 case RISCVII::VLMUL::LMUL_4: 245 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, 246 "Unexpected subreg numbering"); 247 SubReg0 = RISCV::sub_vrm4_0; 248 RegClassID = RISCV::VRN2M4RegClassID; 249 break; 250 } 251 252 SDLoc DL(Regs[0]); 253 SmallVector<SDValue, 8> Ops; 254 255 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 256 257 for (unsigned I = 0; I < Regs.size(); ++I) { 258 Ops.push_back(Regs[I]); 259 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 260 } 261 SDNode *N = 262 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 263 return SDValue(N, 0); 264 } 265 266 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 267 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 268 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 269 bool IsLoad, MVT *IndexVT) { 270 SDValue Chain = Node->getOperand(0); 271 SDValue Glue; 272 273 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 274 275 if (IsStridedOrIndexed) { 276 Operands.push_back(Node->getOperand(CurOp++)); // Index. 277 if (IndexVT) 278 *IndexVT = Operands.back()->getSimpleValueType(0); 279 } 280 281 if (IsMasked) { 282 // Mask needs to be copied to V0. 283 SDValue Mask = Node->getOperand(CurOp++); 284 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 285 Glue = Chain.getValue(1); 286 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 287 } 288 SDValue VL; 289 selectVLOp(Node->getOperand(CurOp++), VL); 290 Operands.push_back(VL); 291 292 MVT XLenVT = Subtarget->getXLenVT(); 293 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 294 Operands.push_back(SEWOp); 295 296 // Masked load has the tail policy argument. 297 if (IsMasked && IsLoad) { 298 // Policy must be a constant. 299 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 300 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 301 Operands.push_back(PolicyOp); 302 } 303 304 Operands.push_back(Chain); // Chain. 305 if (Glue) 306 Operands.push_back(Glue); 307 } 308 309 static bool isAllUndef(ArrayRef<SDValue> Values) { 310 return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); 311 } 312 313 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 314 bool IsStrided) { 315 SDLoc DL(Node); 316 unsigned NF = Node->getNumValues() - 1; 317 MVT VT = Node->getSimpleValueType(0); 318 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 319 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 320 321 unsigned CurOp = 2; 322 SmallVector<SDValue, 8> Operands; 323 324 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 325 Node->op_begin() + CurOp + NF); 326 bool IsTU = IsMasked || !isAllUndef(Regs); 327 if (IsTU) { 328 SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); 329 Operands.push_back(Merge); 330 } 331 CurOp += NF; 332 333 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 334 Operands, /*IsLoad=*/true); 335 336 const RISCV::VLSEGPseudo *P = 337 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 338 static_cast<unsigned>(LMUL)); 339 MachineSDNode *Load = 340 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 341 342 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 343 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 344 345 SDValue SuperReg = SDValue(Load, 0); 346 for (unsigned I = 0; I < NF; ++I) { 347 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 348 ReplaceUses(SDValue(Node, I), 349 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 350 } 351 352 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 353 CurDAG->RemoveDeadNode(Node); 354 } 355 356 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 357 SDLoc DL(Node); 358 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 359 MVT VT = Node->getSimpleValueType(0); 360 MVT XLenVT = Subtarget->getXLenVT(); 361 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 362 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 363 364 unsigned CurOp = 2; 365 SmallVector<SDValue, 7> Operands; 366 367 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 368 Node->op_begin() + CurOp + NF); 369 bool IsTU = IsMasked || !isAllUndef(Regs); 370 if (IsTU) { 371 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 372 Operands.push_back(MaskedOff); 373 } 374 CurOp += NF; 375 376 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 377 /*IsStridedOrIndexed*/ false, Operands, 378 /*IsLoad=*/true); 379 380 const RISCV::VLSEGPseudo *P = 381 RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 382 Log2SEW, static_cast<unsigned>(LMUL)); 383 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 384 XLenVT, MVT::Other, Operands); 385 386 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 387 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 388 389 SDValue SuperReg = SDValue(Load, 0); 390 for (unsigned I = 0; I < NF; ++I) { 391 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 392 ReplaceUses(SDValue(Node, I), 393 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 394 } 395 396 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL 397 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain 398 CurDAG->RemoveDeadNode(Node); 399 } 400 401 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 402 bool IsOrdered) { 403 SDLoc DL(Node); 404 unsigned NF = Node->getNumValues() - 1; 405 MVT VT = Node->getSimpleValueType(0); 406 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 407 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 408 409 unsigned CurOp = 2; 410 SmallVector<SDValue, 8> Operands; 411 412 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 413 Node->op_begin() + CurOp + NF); 414 bool IsTU = IsMasked || !isAllUndef(Regs); 415 if (IsTU) { 416 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 417 Operands.push_back(MaskedOff); 418 } 419 CurOp += NF; 420 421 MVT IndexVT; 422 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 423 /*IsStridedOrIndexed*/ true, Operands, 424 /*IsLoad=*/true, &IndexVT); 425 426 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 427 "Element count mismatch"); 428 429 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 430 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 431 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 432 report_fatal_error("The V extension does not support EEW=64 for index " 433 "values when XLEN=32"); 434 } 435 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 436 NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 437 static_cast<unsigned>(IndexLMUL)); 438 MachineSDNode *Load = 439 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 440 441 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 442 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 443 444 SDValue SuperReg = SDValue(Load, 0); 445 for (unsigned I = 0; I < NF; ++I) { 446 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 447 ReplaceUses(SDValue(Node, I), 448 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 449 } 450 451 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 452 CurDAG->RemoveDeadNode(Node); 453 } 454 455 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 456 bool IsStrided) { 457 SDLoc DL(Node); 458 unsigned NF = Node->getNumOperands() - 4; 459 if (IsStrided) 460 NF--; 461 if (IsMasked) 462 NF--; 463 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 464 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 465 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 466 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 467 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 468 469 SmallVector<SDValue, 8> Operands; 470 Operands.push_back(StoreVal); 471 unsigned CurOp = 2 + NF; 472 473 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 474 Operands); 475 476 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 477 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 478 MachineSDNode *Store = 479 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 480 481 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 482 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 483 484 ReplaceNode(Node, Store); 485 } 486 487 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 488 bool IsOrdered) { 489 SDLoc DL(Node); 490 unsigned NF = Node->getNumOperands() - 5; 491 if (IsMasked) 492 --NF; 493 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 494 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 495 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 496 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 497 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 498 499 SmallVector<SDValue, 8> Operands; 500 Operands.push_back(StoreVal); 501 unsigned CurOp = 2 + NF; 502 503 MVT IndexVT; 504 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 505 /*IsStridedOrIndexed*/ true, Operands, 506 /*IsLoad=*/false, &IndexVT); 507 508 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 509 "Element count mismatch"); 510 511 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 512 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 513 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 514 report_fatal_error("The V extension does not support EEW=64 for index " 515 "values when XLEN=32"); 516 } 517 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 518 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 519 static_cast<unsigned>(IndexLMUL)); 520 MachineSDNode *Store = 521 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 522 523 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 524 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 525 526 ReplaceNode(Node, Store); 527 } 528 529 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 530 if (!Subtarget->hasVInstructions()) 531 return; 532 533 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 534 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 535 "Unexpected opcode"); 536 537 SDLoc DL(Node); 538 MVT XLenVT = Subtarget->getXLenVT(); 539 540 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 541 unsigned IntNoOffset = HasChain ? 1 : 0; 542 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 543 544 assert((IntNo == Intrinsic::riscv_vsetvli || 545 IntNo == Intrinsic::riscv_vsetvlimax || 546 IntNo == Intrinsic::riscv_vsetvli_opt || 547 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 548 "Unexpected vsetvli intrinsic"); 549 550 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 551 IntNo == Intrinsic::riscv_vsetvlimax_opt; 552 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 553 554 assert(Node->getNumOperands() == Offset + 2 && 555 "Unexpected number of operands"); 556 557 unsigned SEW = 558 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 559 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 560 Node->getConstantOperandVal(Offset + 1) & 0x7); 561 562 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 563 /*MaskAgnostic*/ false); 564 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 565 566 SmallVector<EVT, 2> VTs = {XLenVT}; 567 if (HasChain) 568 VTs.push_back(MVT::Other); 569 570 SDValue VLOperand; 571 unsigned Opcode = RISCV::PseudoVSETVLI; 572 if (VLMax) { 573 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 574 Opcode = RISCV::PseudoVSETVLIX0; 575 } else { 576 VLOperand = Node->getOperand(IntNoOffset + 1); 577 578 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 579 uint64_t AVL = C->getZExtValue(); 580 if (isUInt<5>(AVL)) { 581 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 582 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 583 if (HasChain) 584 Ops.push_back(Node->getOperand(0)); 585 ReplaceNode( 586 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 587 return; 588 } 589 } 590 } 591 592 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 593 if (HasChain) 594 Ops.push_back(Node->getOperand(0)); 595 596 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 597 } 598 599 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { 600 MVT VT = Node->getSimpleValueType(0); 601 unsigned Opcode = Node->getOpcode(); 602 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) && 603 "Unexpected opcode"); 604 SDLoc DL(Node); 605 606 // For operations of the form (x << C1) op C2, check if we can use 607 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1. 608 SDValue N0 = Node->getOperand(0); 609 SDValue N1 = Node->getOperand(1); 610 611 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 612 if (!Cst) 613 return false; 614 615 int64_t Val = Cst->getSExtValue(); 616 617 // Check if immediate can already use ANDI/ORI/XORI. 618 if (isInt<12>(Val)) 619 return false; 620 621 SDValue Shift = N0; 622 623 // If Val is simm32 and we have a sext_inreg from i32, then the binop 624 // produces at least 33 sign bits. We can peek through the sext_inreg and use 625 // a SLLIW at the end. 626 bool SignExt = false; 627 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 628 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) { 629 SignExt = true; 630 Shift = N0.getOperand(0); 631 } 632 633 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) 634 return false; 635 636 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 637 if (!ShlCst) 638 return false; 639 640 uint64_t ShAmt = ShlCst->getZExtValue(); 641 642 // Make sure that we don't change the operation by removing bits. 643 // This only matters for OR and XOR, AND is unaffected. 644 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt); 645 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 646 return false; 647 648 int64_t ShiftedVal = Val >> ShAmt; 649 if (!isInt<12>(ShiftedVal)) 650 return false; 651 652 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW. 653 if (SignExt && ShAmt >= 32) 654 return false; 655 656 // Ok, we can reorder to get a smaller immediate. 657 unsigned BinOpc; 658 switch (Opcode) { 659 default: llvm_unreachable("Unexpected opcode"); 660 case ISD::AND: BinOpc = RISCV::ANDI; break; 661 case ISD::OR: BinOpc = RISCV::ORI; break; 662 case ISD::XOR: BinOpc = RISCV::XORI; break; 663 } 664 665 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI; 666 667 SDNode *BinOp = 668 CurDAG->getMachineNode(BinOpc, DL, VT, Shift.getOperand(0), 669 CurDAG->getTargetConstant(ShiftedVal, DL, VT)); 670 SDNode *SLLI = 671 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0), 672 CurDAG->getTargetConstant(ShAmt, DL, VT)); 673 ReplaceNode(Node, SLLI); 674 return true; 675 } 676 677 void RISCVDAGToDAGISel::Select(SDNode *Node) { 678 // If we have a custom node, we have already selected. 679 if (Node->isMachineOpcode()) { 680 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 681 Node->setNodeId(-1); 682 return; 683 } 684 685 // Instruction Selection not handled by the auto-generated tablegen selection 686 // should be handled here. 687 unsigned Opcode = Node->getOpcode(); 688 MVT XLenVT = Subtarget->getXLenVT(); 689 SDLoc DL(Node); 690 MVT VT = Node->getSimpleValueType(0); 691 692 switch (Opcode) { 693 case ISD::Constant: { 694 auto *ConstNode = cast<ConstantSDNode>(Node); 695 if (VT == XLenVT && ConstNode->isZero()) { 696 SDValue New = 697 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 698 ReplaceNode(Node, New.getNode()); 699 return; 700 } 701 int64_t Imm = ConstNode->getSExtValue(); 702 // If the upper XLen-16 bits are not used, try to convert this to a simm12 703 // by sign extending bit 15. 704 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 705 hasAllHUsers(Node)) 706 Imm = SignExtend64<16>(Imm); 707 // If the upper 32-bits are not used try to convert this into a simm32 by 708 // sign extending bit 32. 709 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 710 Imm = SignExtend64<32>(Imm); 711 712 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 713 return; 714 } 715 case ISD::SHL: { 716 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 717 if (!N1C) 718 break; 719 SDValue N0 = Node->getOperand(0); 720 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 721 !isa<ConstantSDNode>(N0.getOperand(1))) 722 break; 723 unsigned ShAmt = N1C->getZExtValue(); 724 uint64_t Mask = N0.getConstantOperandVal(1); 725 726 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has 727 // 32 leading zeros and C3 trailing zeros. 728 if (ShAmt <= 32 && isShiftedMask_64(Mask)) { 729 unsigned XLen = Subtarget->getXLen(); 730 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 731 unsigned TrailingZeros = countTrailingZeros(Mask); 732 if (TrailingZeros > 0 && LeadingZeros == 32) { 733 SDNode *SRLIW = CurDAG->getMachineNode( 734 RISCV::SRLIW, DL, VT, N0->getOperand(0), 735 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 736 SDNode *SLLI = CurDAG->getMachineNode( 737 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 738 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 739 ReplaceNode(Node, SLLI); 740 return; 741 } 742 } 743 break; 744 } 745 case ISD::SRL: { 746 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 747 if (!N1C) 748 break; 749 SDValue N0 = Node->getOperand(0); 750 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 751 break; 752 unsigned ShAmt = N1C->getZExtValue(); 753 uint64_t Mask = N0.getConstantOperandVal(1); 754 755 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 756 // 32 leading zeros and C3 trailing zeros. 757 if (isShiftedMask_64(Mask) && N0.hasOneUse()) { 758 unsigned XLen = Subtarget->getXLen(); 759 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 760 unsigned TrailingZeros = countTrailingZeros(Mask); 761 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 762 SDNode *SRLIW = CurDAG->getMachineNode( 763 RISCV::SRLIW, DL, VT, N0->getOperand(0), 764 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 765 SDNode *SLLI = CurDAG->getMachineNode( 766 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 767 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 768 ReplaceNode(Node, SLLI); 769 return; 770 } 771 } 772 773 // Optimize (srl (and X, C2), C) -> 774 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 775 // Where C2 is a mask with C3 trailing ones. 776 // Taking into account that the C2 may have had lower bits unset by 777 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 778 // This pattern occurs when type legalizing right shifts for types with 779 // less than XLen bits. 780 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 781 if (!isMask_64(Mask)) 782 break; 783 unsigned TrailingOnes = countTrailingOnes(Mask); 784 if (ShAmt >= TrailingOnes) 785 break; 786 // If the mask has 32 trailing ones, use SRLIW. 787 if (TrailingOnes == 32) { 788 SDNode *SRLIW = 789 CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, N0->getOperand(0), 790 CurDAG->getTargetConstant(ShAmt, DL, VT)); 791 ReplaceNode(Node, SRLIW); 792 return; 793 } 794 795 // Only do the remaining transforms if the shift has one use. 796 if (!N0.hasOneUse()) 797 break; 798 799 // If C2 is (1 << ShAmt) use bexti if possible. 800 if (Subtarget->hasStdExtZbs() && ShAmt + 1 == TrailingOnes) { 801 SDNode *BEXTI = 802 CurDAG->getMachineNode(RISCV::BEXTI, DL, VT, N0->getOperand(0), 803 CurDAG->getTargetConstant(ShAmt, DL, VT)); 804 ReplaceNode(Node, BEXTI); 805 return; 806 } 807 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 808 SDNode *SLLI = 809 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 810 CurDAG->getTargetConstant(LShAmt, DL, VT)); 811 SDNode *SRLI = CurDAG->getMachineNode( 812 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 813 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 814 ReplaceNode(Node, SRLI); 815 return; 816 } 817 case ISD::SRA: { 818 // Optimize (sra (sext_inreg X, i16), C) -> 819 // (srai (slli X, (XLen-16), (XLen-16) + C) 820 // And (sra (sext_inreg X, i8), C) -> 821 // (srai (slli X, (XLen-8), (XLen-8) + C) 822 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 823 // This transform matches the code we get without Zbb. The shifts are more 824 // compressible, and this can help expose CSE opportunities in the sdiv by 825 // constant optimization. 826 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 827 if (!N1C) 828 break; 829 SDValue N0 = Node->getOperand(0); 830 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 831 break; 832 unsigned ShAmt = N1C->getZExtValue(); 833 unsigned ExtSize = 834 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 835 // ExtSize of 32 should use sraiw via tablegen pattern. 836 if (ExtSize >= 32 || ShAmt >= ExtSize) 837 break; 838 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 839 SDNode *SLLI = 840 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 841 CurDAG->getTargetConstant(LShAmt, DL, VT)); 842 SDNode *SRAI = CurDAG->getMachineNode( 843 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 844 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 845 ReplaceNode(Node, SRAI); 846 return; 847 } 848 case ISD::OR: 849 case ISD::XOR: 850 if (tryShrinkShlLogicImm(Node)) 851 return; 852 853 break; 854 case ISD::AND: { 855 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 856 if (!N1C) 857 break; 858 859 SDValue N0 = Node->getOperand(0); 860 861 bool LeftShift = N0.getOpcode() == ISD::SHL; 862 if (LeftShift || N0.getOpcode() == ISD::SRL) { 863 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 864 if (!C) 865 break; 866 unsigned C2 = C->getZExtValue(); 867 unsigned XLen = Subtarget->getXLen(); 868 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 869 870 uint64_t C1 = N1C->getZExtValue(); 871 872 // Keep track of whether this is a c.andi. If we can't use c.andi, the 873 // shift pair might offer more compression opportunities. 874 // TODO: We could check for C extension here, but we don't have many lit 875 // tests with the C extension enabled so not checking gets better 876 // coverage. 877 // TODO: What if ANDI faster than shift? 878 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 879 880 // Clear irrelevant bits in the mask. 881 if (LeftShift) 882 C1 &= maskTrailingZeros<uint64_t>(C2); 883 else 884 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 885 886 // Some transforms should only be done if the shift has a single use or 887 // the AND would become (srli (slli X, 32), 32) 888 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 889 890 SDValue X = N0.getOperand(0); 891 892 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 893 // with c3 leading zeros. 894 if (!LeftShift && isMask_64(C1)) { 895 unsigned Leading = XLen - llvm::bit_width(C1); 896 if (C2 < Leading) { 897 // If the number of leading zeros is C2+32 this can be SRLIW. 898 if (C2 + 32 == Leading) { 899 SDNode *SRLIW = CurDAG->getMachineNode( 900 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 901 ReplaceNode(Node, SRLIW); 902 return; 903 } 904 905 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) 906 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 907 // 908 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 909 // legalized and goes through DAG combine. 910 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 911 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 912 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 913 SDNode *SRAIW = 914 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 915 CurDAG->getTargetConstant(31, DL, VT)); 916 SDNode *SRLIW = CurDAG->getMachineNode( 917 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 918 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 919 ReplaceNode(Node, SRLIW); 920 return; 921 } 922 923 // (srli (slli x, c3-c2), c3). 924 // Skip if we could use (zext.w (sraiw X, C2)). 925 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 926 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 927 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 928 // Also Skip if we can use bexti. 929 Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1; 930 if (OneUseOrZExtW && !Skip) { 931 SDNode *SLLI = CurDAG->getMachineNode( 932 RISCV::SLLI, DL, VT, X, 933 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 934 SDNode *SRLI = CurDAG->getMachineNode( 935 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 936 CurDAG->getTargetConstant(Leading, DL, VT)); 937 ReplaceNode(Node, SRLI); 938 return; 939 } 940 } 941 } 942 943 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 944 // shifted by c2 bits with c3 leading zeros. 945 if (LeftShift && isShiftedMask_64(C1)) { 946 unsigned Leading = XLen - llvm::bit_width(C1); 947 948 if (C2 + Leading < XLen && 949 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 950 // Use slli.uw when possible. 951 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 952 SDNode *SLLI_UW = 953 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X, 954 CurDAG->getTargetConstant(C2, DL, VT)); 955 ReplaceNode(Node, SLLI_UW); 956 return; 957 } 958 959 // (srli (slli c2+c3), c3) 960 if (OneUseOrZExtW && !IsCANDI) { 961 SDNode *SLLI = CurDAG->getMachineNode( 962 RISCV::SLLI, DL, VT, X, 963 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 964 SDNode *SRLI = CurDAG->getMachineNode( 965 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 966 CurDAG->getTargetConstant(Leading, DL, VT)); 967 ReplaceNode(Node, SRLI); 968 return; 969 } 970 } 971 } 972 973 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 974 // shifted mask with c2 leading zeros and c3 trailing zeros. 975 if (!LeftShift && isShiftedMask_64(C1)) { 976 unsigned Leading = XLen - llvm::bit_width(C1); 977 unsigned Trailing = countTrailingZeros(C1); 978 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && 979 !IsCANDI) { 980 unsigned SrliOpc = RISCV::SRLI; 981 // If the input is zexti32 we should use SRLIW. 982 if (X.getOpcode() == ISD::AND && 983 isa<ConstantSDNode>(X.getOperand(1)) && 984 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 985 SrliOpc = RISCV::SRLIW; 986 X = X.getOperand(0); 987 } 988 SDNode *SRLI = CurDAG->getMachineNode( 989 SrliOpc, DL, VT, X, 990 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 991 SDNode *SLLI = CurDAG->getMachineNode( 992 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 993 CurDAG->getTargetConstant(Trailing, DL, VT)); 994 ReplaceNode(Node, SLLI); 995 return; 996 } 997 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 998 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 999 OneUseOrZExtW && !IsCANDI) { 1000 SDNode *SRLIW = CurDAG->getMachineNode( 1001 RISCV::SRLIW, DL, VT, X, 1002 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1003 SDNode *SLLI = CurDAG->getMachineNode( 1004 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1005 CurDAG->getTargetConstant(Trailing, DL, VT)); 1006 ReplaceNode(Node, SLLI); 1007 return; 1008 } 1009 } 1010 1011 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 1012 // shifted mask with no leading zeros and c3 trailing zeros. 1013 if (LeftShift && isShiftedMask_64(C1)) { 1014 unsigned Leading = XLen - llvm::bit_width(C1); 1015 unsigned Trailing = countTrailingZeros(C1); 1016 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 1017 SDNode *SRLI = CurDAG->getMachineNode( 1018 RISCV::SRLI, DL, VT, X, 1019 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1020 SDNode *SLLI = CurDAG->getMachineNode( 1021 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1022 CurDAG->getTargetConstant(Trailing, DL, VT)); 1023 ReplaceNode(Node, SLLI); 1024 return; 1025 } 1026 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 1027 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 1028 SDNode *SRLIW = CurDAG->getMachineNode( 1029 RISCV::SRLIW, DL, VT, X, 1030 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1031 SDNode *SLLI = CurDAG->getMachineNode( 1032 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1033 CurDAG->getTargetConstant(Trailing, DL, VT)); 1034 ReplaceNode(Node, SLLI); 1035 return; 1036 } 1037 } 1038 } 1039 1040 if (tryShrinkShlLogicImm(Node)) 1041 return; 1042 1043 break; 1044 } 1045 case ISD::MUL: { 1046 // Special case for calculating (mul (and X, C2), C1) where the full product 1047 // fits in XLen bits. We can shift X left by the number of leading zeros in 1048 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1049 // product has XLen trailing zeros, putting it in the output of MULHU. This 1050 // can avoid materializing a constant in a register for C2. 1051 1052 // RHS should be a constant. 1053 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1054 if (!N1C || !N1C->hasOneUse()) 1055 break; 1056 1057 // LHS should be an AND with constant. 1058 SDValue N0 = Node->getOperand(0); 1059 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1060 break; 1061 1062 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 1063 1064 // Constant should be a mask. 1065 if (!isMask_64(C2)) 1066 break; 1067 1068 // If this can be an ANDI, ZEXT.H or ZEXT.W, don't do this if the ANDI/ZEXT 1069 // has multiple users or the constant is a simm12. This prevents inserting 1070 // a shift and still have uses of the AND/ZEXT. Shifting a simm12 will 1071 // likely make it more costly to materialize. Otherwise, using a SLLI 1072 // might allow it to be compressed. 1073 bool IsANDIOrZExt = 1074 isInt<12>(C2) || 1075 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()) || 1076 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()); 1077 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) 1078 break; 1079 1080 // We need to shift left the AND input and C1 by a total of XLen bits. 1081 1082 // How far left do we need to shift the AND input? 1083 unsigned XLen = Subtarget->getXLen(); 1084 unsigned LeadingZeros = XLen - llvm::bit_width(C2); 1085 1086 // The constant gets shifted by the remaining amount unless that would 1087 // shift bits out. 1088 uint64_t C1 = N1C->getZExtValue(); 1089 unsigned ConstantShift = XLen - LeadingZeros; 1090 if (ConstantShift > (XLen - llvm::bit_width(C1))) 1091 break; 1092 1093 uint64_t ShiftedC1 = C1 << ConstantShift; 1094 // If this RV32, we need to sign extend the constant. 1095 if (XLen == 32) 1096 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1097 1098 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1099 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 1100 SDNode *SLLI = 1101 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1102 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1103 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1104 SDValue(SLLI, 0), SDValue(Imm, 0)); 1105 ReplaceNode(Node, MULHU); 1106 return; 1107 } 1108 case ISD::INTRINSIC_WO_CHAIN: { 1109 unsigned IntNo = Node->getConstantOperandVal(0); 1110 switch (IntNo) { 1111 // By default we do not custom select any intrinsic. 1112 default: 1113 break; 1114 case Intrinsic::riscv_vmsgeu: 1115 case Intrinsic::riscv_vmsge: { 1116 SDValue Src1 = Node->getOperand(1); 1117 SDValue Src2 = Node->getOperand(2); 1118 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1119 bool IsCmpUnsignedZero = false; 1120 // Only custom select scalar second operand. 1121 if (Src2.getValueType() != XLenVT) 1122 break; 1123 // Small constants are handled with patterns. 1124 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1125 int64_t CVal = C->getSExtValue(); 1126 if (CVal >= -15 && CVal <= 16) { 1127 if (!IsUnsigned || CVal != 0) 1128 break; 1129 IsCmpUnsignedZero = true; 1130 } 1131 } 1132 MVT Src1VT = Src1.getSimpleValueType(); 1133 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 1134 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1135 default: 1136 llvm_unreachable("Unexpected LMUL!"); 1137 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1138 case RISCVII::VLMUL::lmulenum: \ 1139 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1140 : RISCV::PseudoVMSLT_VX_##suffix; \ 1141 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1142 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1143 break; 1144 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 1145 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 1146 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 1147 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 1148 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 1149 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 1150 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 1151 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 1152 } 1153 SDValue SEW = CurDAG->getTargetConstant( 1154 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1155 SDValue VL; 1156 selectVLOp(Node->getOperand(3), VL); 1157 1158 // If vmsgeu with 0 immediate, expand it to vmset. 1159 if (IsCmpUnsignedZero) { 1160 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 1161 return; 1162 } 1163 1164 // Expand to 1165 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1166 SDValue Cmp = SDValue( 1167 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1168 0); 1169 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1170 {Cmp, Cmp, VL, SEW})); 1171 return; 1172 } 1173 case Intrinsic::riscv_vmsgeu_mask: 1174 case Intrinsic::riscv_vmsge_mask: { 1175 SDValue Src1 = Node->getOperand(2); 1176 SDValue Src2 = Node->getOperand(3); 1177 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1178 bool IsCmpUnsignedZero = false; 1179 // Only custom select scalar second operand. 1180 if (Src2.getValueType() != XLenVT) 1181 break; 1182 // Small constants are handled with patterns. 1183 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1184 int64_t CVal = C->getSExtValue(); 1185 if (CVal >= -15 && CVal <= 16) { 1186 if (!IsUnsigned || CVal != 0) 1187 break; 1188 IsCmpUnsignedZero = true; 1189 } 1190 } 1191 MVT Src1VT = Src1.getSimpleValueType(); 1192 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1193 VMOROpcode; 1194 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1195 default: 1196 llvm_unreachable("Unexpected LMUL!"); 1197 #define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ 1198 case RISCVII::VLMUL::lmulenum: \ 1199 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1200 : RISCV::PseudoVMSLT_VX_##suffix; \ 1201 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1202 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1203 break; 1204 CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) 1205 CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) 1206 CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) 1207 CASE_VMSLT_OPCODES(LMUL_1, M1, B8) 1208 CASE_VMSLT_OPCODES(LMUL_2, M2, B16) 1209 CASE_VMSLT_OPCODES(LMUL_4, M4, B32) 1210 CASE_VMSLT_OPCODES(LMUL_8, M8, B64) 1211 #undef CASE_VMSLT_OPCODES 1212 } 1213 // Mask operations use the LMUL from the mask type. 1214 switch (RISCVTargetLowering::getLMUL(VT)) { 1215 default: 1216 llvm_unreachable("Unexpected LMUL!"); 1217 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 1218 case RISCVII::VLMUL::lmulenum: \ 1219 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1220 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1221 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 1222 break; 1223 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) 1224 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) 1225 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) 1226 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) 1227 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) 1228 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) 1229 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) 1230 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 1231 } 1232 SDValue SEW = CurDAG->getTargetConstant( 1233 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1234 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1235 SDValue VL; 1236 selectVLOp(Node->getOperand(5), VL); 1237 SDValue MaskedOff = Node->getOperand(1); 1238 SDValue Mask = Node->getOperand(4); 1239 1240 // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. 1241 if (IsCmpUnsignedZero) { 1242 // We don't need vmor if the MaskedOff and the Mask are the same 1243 // value. 1244 if (Mask == MaskedOff) { 1245 ReplaceUses(Node, Mask.getNode()); 1246 return; 1247 } 1248 ReplaceNode(Node, 1249 CurDAG->getMachineNode(VMOROpcode, DL, VT, 1250 {Mask, MaskedOff, VL, MaskSEW})); 1251 return; 1252 } 1253 1254 // If the MaskedOff value and the Mask are the same value use 1255 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1256 // This avoids needing to copy v0 to vd before starting the next sequence. 1257 if (Mask == MaskedOff) { 1258 SDValue Cmp = SDValue( 1259 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1260 0); 1261 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1262 {Mask, Cmp, VL, MaskSEW})); 1263 return; 1264 } 1265 1266 // Mask needs to be copied to V0. 1267 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1268 RISCV::V0, Mask, SDValue()); 1269 SDValue Glue = Chain.getValue(1); 1270 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1271 1272 // Otherwise use 1273 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1274 // The result is mask undisturbed. 1275 // We use the same instructions to emulate mask agnostic behavior, because 1276 // the agnostic result can be either undisturbed or all 1. 1277 SDValue Cmp = SDValue( 1278 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1279 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1280 0); 1281 // vmxor.mm vd, vd, v0 is used to update active value. 1282 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1283 {Cmp, Mask, VL, MaskSEW})); 1284 return; 1285 } 1286 case Intrinsic::riscv_vsetvli_opt: 1287 case Intrinsic::riscv_vsetvlimax_opt: 1288 return selectVSETVLI(Node); 1289 } 1290 break; 1291 } 1292 case ISD::INTRINSIC_W_CHAIN: { 1293 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1294 switch (IntNo) { 1295 // By default we do not custom select any intrinsic. 1296 default: 1297 break; 1298 case Intrinsic::riscv_vsetvli: 1299 case Intrinsic::riscv_vsetvlimax: 1300 return selectVSETVLI(Node); 1301 case Intrinsic::riscv_vlseg2: 1302 case Intrinsic::riscv_vlseg3: 1303 case Intrinsic::riscv_vlseg4: 1304 case Intrinsic::riscv_vlseg5: 1305 case Intrinsic::riscv_vlseg6: 1306 case Intrinsic::riscv_vlseg7: 1307 case Intrinsic::riscv_vlseg8: { 1308 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1309 return; 1310 } 1311 case Intrinsic::riscv_vlseg2_mask: 1312 case Intrinsic::riscv_vlseg3_mask: 1313 case Intrinsic::riscv_vlseg4_mask: 1314 case Intrinsic::riscv_vlseg5_mask: 1315 case Intrinsic::riscv_vlseg6_mask: 1316 case Intrinsic::riscv_vlseg7_mask: 1317 case Intrinsic::riscv_vlseg8_mask: { 1318 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1319 return; 1320 } 1321 case Intrinsic::riscv_vlsseg2: 1322 case Intrinsic::riscv_vlsseg3: 1323 case Intrinsic::riscv_vlsseg4: 1324 case Intrinsic::riscv_vlsseg5: 1325 case Intrinsic::riscv_vlsseg6: 1326 case Intrinsic::riscv_vlsseg7: 1327 case Intrinsic::riscv_vlsseg8: { 1328 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1329 return; 1330 } 1331 case Intrinsic::riscv_vlsseg2_mask: 1332 case Intrinsic::riscv_vlsseg3_mask: 1333 case Intrinsic::riscv_vlsseg4_mask: 1334 case Intrinsic::riscv_vlsseg5_mask: 1335 case Intrinsic::riscv_vlsseg6_mask: 1336 case Intrinsic::riscv_vlsseg7_mask: 1337 case Intrinsic::riscv_vlsseg8_mask: { 1338 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1339 return; 1340 } 1341 case Intrinsic::riscv_vloxseg2: 1342 case Intrinsic::riscv_vloxseg3: 1343 case Intrinsic::riscv_vloxseg4: 1344 case Intrinsic::riscv_vloxseg5: 1345 case Intrinsic::riscv_vloxseg6: 1346 case Intrinsic::riscv_vloxseg7: 1347 case Intrinsic::riscv_vloxseg8: 1348 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1349 return; 1350 case Intrinsic::riscv_vluxseg2: 1351 case Intrinsic::riscv_vluxseg3: 1352 case Intrinsic::riscv_vluxseg4: 1353 case Intrinsic::riscv_vluxseg5: 1354 case Intrinsic::riscv_vluxseg6: 1355 case Intrinsic::riscv_vluxseg7: 1356 case Intrinsic::riscv_vluxseg8: 1357 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1358 return; 1359 case Intrinsic::riscv_vloxseg2_mask: 1360 case Intrinsic::riscv_vloxseg3_mask: 1361 case Intrinsic::riscv_vloxseg4_mask: 1362 case Intrinsic::riscv_vloxseg5_mask: 1363 case Intrinsic::riscv_vloxseg6_mask: 1364 case Intrinsic::riscv_vloxseg7_mask: 1365 case Intrinsic::riscv_vloxseg8_mask: 1366 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1367 return; 1368 case Intrinsic::riscv_vluxseg2_mask: 1369 case Intrinsic::riscv_vluxseg3_mask: 1370 case Intrinsic::riscv_vluxseg4_mask: 1371 case Intrinsic::riscv_vluxseg5_mask: 1372 case Intrinsic::riscv_vluxseg6_mask: 1373 case Intrinsic::riscv_vluxseg7_mask: 1374 case Intrinsic::riscv_vluxseg8_mask: 1375 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1376 return; 1377 case Intrinsic::riscv_vlseg8ff: 1378 case Intrinsic::riscv_vlseg7ff: 1379 case Intrinsic::riscv_vlseg6ff: 1380 case Intrinsic::riscv_vlseg5ff: 1381 case Intrinsic::riscv_vlseg4ff: 1382 case Intrinsic::riscv_vlseg3ff: 1383 case Intrinsic::riscv_vlseg2ff: { 1384 selectVLSEGFF(Node, /*IsMasked*/ false); 1385 return; 1386 } 1387 case Intrinsic::riscv_vlseg8ff_mask: 1388 case Intrinsic::riscv_vlseg7ff_mask: 1389 case Intrinsic::riscv_vlseg6ff_mask: 1390 case Intrinsic::riscv_vlseg5ff_mask: 1391 case Intrinsic::riscv_vlseg4ff_mask: 1392 case Intrinsic::riscv_vlseg3ff_mask: 1393 case Intrinsic::riscv_vlseg2ff_mask: { 1394 selectVLSEGFF(Node, /*IsMasked*/ true); 1395 return; 1396 } 1397 case Intrinsic::riscv_vloxei: 1398 case Intrinsic::riscv_vloxei_mask: 1399 case Intrinsic::riscv_vluxei: 1400 case Intrinsic::riscv_vluxei_mask: { 1401 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1402 IntNo == Intrinsic::riscv_vluxei_mask; 1403 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1404 IntNo == Intrinsic::riscv_vloxei_mask; 1405 1406 MVT VT = Node->getSimpleValueType(0); 1407 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1408 1409 unsigned CurOp = 2; 1410 // Masked intrinsic only have TU version pseduo instructions. 1411 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); 1412 SmallVector<SDValue, 8> Operands; 1413 if (IsTU) 1414 Operands.push_back(Node->getOperand(CurOp++)); 1415 else 1416 // Skip the undef passthru operand for nomask TA version pseudo 1417 CurOp++; 1418 1419 MVT IndexVT; 1420 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1421 /*IsStridedOrIndexed*/ true, Operands, 1422 /*IsLoad=*/true, &IndexVT); 1423 1424 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1425 "Element count mismatch"); 1426 1427 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1428 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1429 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1430 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1431 report_fatal_error("The V extension does not support EEW=64 for index " 1432 "values when XLEN=32"); 1433 } 1434 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1435 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1436 static_cast<unsigned>(IndexLMUL)); 1437 MachineSDNode *Load = 1438 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1439 1440 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1441 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1442 1443 ReplaceNode(Node, Load); 1444 return; 1445 } 1446 case Intrinsic::riscv_vlm: 1447 case Intrinsic::riscv_vle: 1448 case Intrinsic::riscv_vle_mask: 1449 case Intrinsic::riscv_vlse: 1450 case Intrinsic::riscv_vlse_mask: { 1451 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1452 IntNo == Intrinsic::riscv_vlse_mask; 1453 bool IsStrided = 1454 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1455 1456 MVT VT = Node->getSimpleValueType(0); 1457 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1458 1459 unsigned CurOp = 2; 1460 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1461 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1462 // Masked intrinsic only have TU version pseduo instructions. 1463 bool IsTU = HasPassthruOperand && 1464 (IsMasked || !Node->getOperand(CurOp).isUndef()); 1465 SmallVector<SDValue, 8> Operands; 1466 if (IsTU) 1467 Operands.push_back(Node->getOperand(CurOp++)); 1468 else if (HasPassthruOperand) 1469 // Skip the undef passthru operand for nomask TA version pseudo 1470 CurOp++; 1471 1472 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1473 Operands, /*IsLoad=*/true); 1474 1475 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1476 const RISCV::VLEPseudo *P = 1477 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1478 static_cast<unsigned>(LMUL)); 1479 MachineSDNode *Load = 1480 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1481 1482 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1483 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1484 1485 ReplaceNode(Node, Load); 1486 return; 1487 } 1488 case Intrinsic::riscv_vleff: 1489 case Intrinsic::riscv_vleff_mask: { 1490 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1491 1492 MVT VT = Node->getSimpleValueType(0); 1493 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1494 1495 unsigned CurOp = 2; 1496 // Masked intrinsic only have TU version pseduo instructions. 1497 bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); 1498 SmallVector<SDValue, 7> Operands; 1499 if (IsTU) 1500 Operands.push_back(Node->getOperand(CurOp++)); 1501 else 1502 // Skip the undef passthru operand for nomask TA version pseudo 1503 CurOp++; 1504 1505 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1506 /*IsStridedOrIndexed*/ false, Operands, 1507 /*IsLoad=*/true); 1508 1509 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1510 const RISCV::VLEPseudo *P = 1511 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1512 Log2SEW, static_cast<unsigned>(LMUL)); 1513 MachineSDNode *Load = CurDAG->getMachineNode( 1514 P->Pseudo, DL, Node->getVTList(), Operands); 1515 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1516 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1517 1518 ReplaceNode(Node, Load); 1519 return; 1520 } 1521 } 1522 break; 1523 } 1524 case ISD::INTRINSIC_VOID: { 1525 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1526 switch (IntNo) { 1527 case Intrinsic::riscv_vsseg2: 1528 case Intrinsic::riscv_vsseg3: 1529 case Intrinsic::riscv_vsseg4: 1530 case Intrinsic::riscv_vsseg5: 1531 case Intrinsic::riscv_vsseg6: 1532 case Intrinsic::riscv_vsseg7: 1533 case Intrinsic::riscv_vsseg8: { 1534 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1535 return; 1536 } 1537 case Intrinsic::riscv_vsseg2_mask: 1538 case Intrinsic::riscv_vsseg3_mask: 1539 case Intrinsic::riscv_vsseg4_mask: 1540 case Intrinsic::riscv_vsseg5_mask: 1541 case Intrinsic::riscv_vsseg6_mask: 1542 case Intrinsic::riscv_vsseg7_mask: 1543 case Intrinsic::riscv_vsseg8_mask: { 1544 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1545 return; 1546 } 1547 case Intrinsic::riscv_vssseg2: 1548 case Intrinsic::riscv_vssseg3: 1549 case Intrinsic::riscv_vssseg4: 1550 case Intrinsic::riscv_vssseg5: 1551 case Intrinsic::riscv_vssseg6: 1552 case Intrinsic::riscv_vssseg7: 1553 case Intrinsic::riscv_vssseg8: { 1554 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1555 return; 1556 } 1557 case Intrinsic::riscv_vssseg2_mask: 1558 case Intrinsic::riscv_vssseg3_mask: 1559 case Intrinsic::riscv_vssseg4_mask: 1560 case Intrinsic::riscv_vssseg5_mask: 1561 case Intrinsic::riscv_vssseg6_mask: 1562 case Intrinsic::riscv_vssseg7_mask: 1563 case Intrinsic::riscv_vssseg8_mask: { 1564 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1565 return; 1566 } 1567 case Intrinsic::riscv_vsoxseg2: 1568 case Intrinsic::riscv_vsoxseg3: 1569 case Intrinsic::riscv_vsoxseg4: 1570 case Intrinsic::riscv_vsoxseg5: 1571 case Intrinsic::riscv_vsoxseg6: 1572 case Intrinsic::riscv_vsoxseg7: 1573 case Intrinsic::riscv_vsoxseg8: 1574 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1575 return; 1576 case Intrinsic::riscv_vsuxseg2: 1577 case Intrinsic::riscv_vsuxseg3: 1578 case Intrinsic::riscv_vsuxseg4: 1579 case Intrinsic::riscv_vsuxseg5: 1580 case Intrinsic::riscv_vsuxseg6: 1581 case Intrinsic::riscv_vsuxseg7: 1582 case Intrinsic::riscv_vsuxseg8: 1583 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1584 return; 1585 case Intrinsic::riscv_vsoxseg2_mask: 1586 case Intrinsic::riscv_vsoxseg3_mask: 1587 case Intrinsic::riscv_vsoxseg4_mask: 1588 case Intrinsic::riscv_vsoxseg5_mask: 1589 case Intrinsic::riscv_vsoxseg6_mask: 1590 case Intrinsic::riscv_vsoxseg7_mask: 1591 case Intrinsic::riscv_vsoxseg8_mask: 1592 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1593 return; 1594 case Intrinsic::riscv_vsuxseg2_mask: 1595 case Intrinsic::riscv_vsuxseg3_mask: 1596 case Intrinsic::riscv_vsuxseg4_mask: 1597 case Intrinsic::riscv_vsuxseg5_mask: 1598 case Intrinsic::riscv_vsuxseg6_mask: 1599 case Intrinsic::riscv_vsuxseg7_mask: 1600 case Intrinsic::riscv_vsuxseg8_mask: 1601 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1602 return; 1603 case Intrinsic::riscv_vsoxei: 1604 case Intrinsic::riscv_vsoxei_mask: 1605 case Intrinsic::riscv_vsuxei: 1606 case Intrinsic::riscv_vsuxei_mask: { 1607 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1608 IntNo == Intrinsic::riscv_vsuxei_mask; 1609 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1610 IntNo == Intrinsic::riscv_vsoxei_mask; 1611 1612 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1613 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1614 1615 unsigned CurOp = 2; 1616 SmallVector<SDValue, 8> Operands; 1617 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1618 1619 MVT IndexVT; 1620 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1621 /*IsStridedOrIndexed*/ true, Operands, 1622 /*IsLoad=*/false, &IndexVT); 1623 1624 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1625 "Element count mismatch"); 1626 1627 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1628 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1629 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1630 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1631 report_fatal_error("The V extension does not support EEW=64 for index " 1632 "values when XLEN=32"); 1633 } 1634 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1635 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1636 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1637 MachineSDNode *Store = 1638 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1639 1640 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1641 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1642 1643 ReplaceNode(Node, Store); 1644 return; 1645 } 1646 case Intrinsic::riscv_vsm: 1647 case Intrinsic::riscv_vse: 1648 case Intrinsic::riscv_vse_mask: 1649 case Intrinsic::riscv_vsse: 1650 case Intrinsic::riscv_vsse_mask: { 1651 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1652 IntNo == Intrinsic::riscv_vsse_mask; 1653 bool IsStrided = 1654 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1655 1656 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1657 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1658 1659 unsigned CurOp = 2; 1660 SmallVector<SDValue, 8> Operands; 1661 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1662 1663 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1664 Operands); 1665 1666 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1667 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1668 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1669 MachineSDNode *Store = 1670 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1671 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1672 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1673 1674 ReplaceNode(Node, Store); 1675 return; 1676 } 1677 } 1678 break; 1679 } 1680 case ISD::BITCAST: { 1681 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1682 // Just drop bitcasts between vectors if both are fixed or both are 1683 // scalable. 1684 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1685 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1686 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1687 CurDAG->RemoveDeadNode(Node); 1688 return; 1689 } 1690 break; 1691 } 1692 case ISD::INSERT_SUBVECTOR: { 1693 SDValue V = Node->getOperand(0); 1694 SDValue SubV = Node->getOperand(1); 1695 SDLoc DL(SubV); 1696 auto Idx = Node->getConstantOperandVal(2); 1697 MVT SubVecVT = SubV.getSimpleValueType(); 1698 1699 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1700 MVT SubVecContainerVT = SubVecVT; 1701 // Establish the correct scalable-vector types for any fixed-length type. 1702 if (SubVecVT.isFixedLengthVector()) 1703 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1704 if (VT.isFixedLengthVector()) 1705 VT = TLI.getContainerForFixedLengthVector(VT); 1706 1707 const auto *TRI = Subtarget->getRegisterInfo(); 1708 unsigned SubRegIdx; 1709 std::tie(SubRegIdx, Idx) = 1710 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1711 VT, SubVecContainerVT, Idx, TRI); 1712 1713 // If the Idx hasn't been completely eliminated then this is a subvector 1714 // insert which doesn't naturally align to a vector register. These must 1715 // be handled using instructions to manipulate the vector registers. 1716 if (Idx != 0) 1717 break; 1718 1719 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1720 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1721 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1722 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1723 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1724 assert((!IsSubVecPartReg || V.isUndef()) && 1725 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1726 "the subvector is smaller than a full-sized register"); 1727 1728 // If we haven't set a SubRegIdx, then we must be going between 1729 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1730 if (SubRegIdx == RISCV::NoSubRegister) { 1731 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1732 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1733 InRegClassID && 1734 "Unexpected subvector extraction"); 1735 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1736 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1737 DL, VT, SubV, RC); 1738 ReplaceNode(Node, NewNode); 1739 return; 1740 } 1741 1742 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1743 ReplaceNode(Node, Insert.getNode()); 1744 return; 1745 } 1746 case ISD::EXTRACT_SUBVECTOR: { 1747 SDValue V = Node->getOperand(0); 1748 auto Idx = Node->getConstantOperandVal(1); 1749 MVT InVT = V.getSimpleValueType(); 1750 SDLoc DL(V); 1751 1752 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1753 MVT SubVecContainerVT = VT; 1754 // Establish the correct scalable-vector types for any fixed-length type. 1755 if (VT.isFixedLengthVector()) 1756 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1757 if (InVT.isFixedLengthVector()) 1758 InVT = TLI.getContainerForFixedLengthVector(InVT); 1759 1760 const auto *TRI = Subtarget->getRegisterInfo(); 1761 unsigned SubRegIdx; 1762 std::tie(SubRegIdx, Idx) = 1763 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1764 InVT, SubVecContainerVT, Idx, TRI); 1765 1766 // If the Idx hasn't been completely eliminated then this is a subvector 1767 // extract which doesn't naturally align to a vector register. These must 1768 // be handled using instructions to manipulate the vector registers. 1769 if (Idx != 0) 1770 break; 1771 1772 // If we haven't set a SubRegIdx, then we must be going between 1773 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1774 if (SubRegIdx == RISCV::NoSubRegister) { 1775 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1776 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1777 InRegClassID && 1778 "Unexpected subvector extraction"); 1779 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1780 SDNode *NewNode = 1781 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1782 ReplaceNode(Node, NewNode); 1783 return; 1784 } 1785 1786 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1787 ReplaceNode(Node, Extract.getNode()); 1788 return; 1789 } 1790 case RISCVISD::VMV_S_X_VL: 1791 case RISCVISD::VFMV_S_F_VL: 1792 case RISCVISD::VMV_V_X_VL: 1793 case RISCVISD::VFMV_V_F_VL: { 1794 // Only if we have optimized zero-stride vector load. 1795 if (!Subtarget->hasOptimizedZeroStrideLoad()) 1796 break; 1797 1798 // Try to match splat of a scalar load to a strided load with stride of x0. 1799 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1800 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1801 if (!Node->getOperand(0).isUndef()) 1802 break; 1803 SDValue Src = Node->getOperand(1); 1804 auto *Ld = dyn_cast<LoadSDNode>(Src); 1805 if (!Ld) 1806 break; 1807 EVT MemVT = Ld->getMemoryVT(); 1808 // The memory VT should be the same size as the element type. 1809 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1810 break; 1811 if (!IsProfitableToFold(Src, Node, Node) || 1812 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1813 break; 1814 1815 SDValue VL; 1816 if (IsScalarMove) { 1817 // We could deal with more VL if we update the VSETVLI insert pass to 1818 // avoid introducing more VSETVLI. 1819 if (!isOneConstant(Node->getOperand(2))) 1820 break; 1821 selectVLOp(Node->getOperand(2), VL); 1822 } else 1823 selectVLOp(Node->getOperand(2), VL); 1824 1825 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1826 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1827 1828 SDValue Operands[] = {Ld->getBasePtr(), 1829 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1830 Ld->getChain()}; 1831 1832 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1833 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1834 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1835 Log2SEW, static_cast<unsigned>(LMUL)); 1836 MachineSDNode *Load = 1837 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); 1838 // Update the chain. 1839 ReplaceUses(Src.getValue(1), SDValue(Load, 1)); 1840 // Record the mem-refs 1841 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 1842 // Replace the splat with the vlse. 1843 ReplaceNode(Node, Load); 1844 return; 1845 } 1846 } 1847 1848 // Select the default instruction. 1849 SelectCode(Node); 1850 } 1851 1852 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1853 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1854 switch (ConstraintID) { 1855 case InlineAsm::Constraint_m: 1856 // We just support simple memory operands that have a single address 1857 // operand and need no special handling. 1858 OutOps.push_back(Op); 1859 return false; 1860 case InlineAsm::Constraint_A: 1861 OutOps.push_back(Op); 1862 return false; 1863 default: 1864 break; 1865 } 1866 1867 return true; 1868 } 1869 1870 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 1871 SDValue &Offset) { 1872 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1873 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1874 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 1875 return true; 1876 } 1877 1878 return false; 1879 } 1880 1881 // Select a frame index and an optional immediate offset from an ADD or OR. 1882 bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, 1883 SDValue &Offset) { 1884 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1885 return true; 1886 1887 if (!CurDAG->isBaseWithConstantOffset(Addr)) 1888 return false; 1889 1890 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { 1891 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1892 if (isInt<12>(CVal)) { 1893 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), 1894 Subtarget->getXLenVT()); 1895 Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), 1896 Subtarget->getXLenVT()); 1897 return true; 1898 } 1899 } 1900 1901 return false; 1902 } 1903 1904 // Fold constant addresses. 1905 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, 1906 const MVT VT, const RISCVSubtarget *Subtarget, 1907 SDValue Addr, SDValue &Base, SDValue &Offset) { 1908 if (!isa<ConstantSDNode>(Addr)) 1909 return false; 1910 1911 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); 1912 1913 // If the constant is a simm12, we can fold the whole constant and use X0 as 1914 // the base. If the constant can be materialized with LUI+simm12, use LUI as 1915 // the base. We can't use generateInstSeq because it favors LUI+ADDIW. 1916 int64_t Lo12 = SignExtend64<12>(CVal); 1917 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; 1918 if (!Subtarget->is64Bit() || isInt<32>(Hi)) { 1919 if (Hi) { 1920 int64_t Hi20 = (Hi >> 12) & 0xfffff; 1921 Base = SDValue( 1922 CurDAG->getMachineNode(RISCV::LUI, DL, VT, 1923 CurDAG->getTargetConstant(Hi20, DL, VT)), 1924 0); 1925 } else { 1926 Base = CurDAG->getRegister(RISCV::X0, VT); 1927 } 1928 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 1929 return true; 1930 } 1931 1932 // Ask how constant materialization would handle this constant. 1933 RISCVMatInt::InstSeq Seq = 1934 RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits()); 1935 1936 // If the last instruction would be an ADDI, we can fold its immediate and 1937 // emit the rest of the sequence as the base. 1938 if (Seq.back().getOpcode() != RISCV::ADDI) 1939 return false; 1940 Lo12 = Seq.back().getImm(); 1941 1942 // Drop the last instruction. 1943 Seq.pop_back(); 1944 assert(!Seq.empty() && "Expected more instructions in sequence"); 1945 1946 Base = SDValue(selectImmSeq(CurDAG, DL, VT, Seq), 0); 1947 Offset = CurDAG->getTargetConstant(Lo12, DL, VT); 1948 return true; 1949 } 1950 1951 // Is this ADD instruction only used as the base pointer of scalar loads and 1952 // stores? 1953 static bool isWorthFoldingAdd(SDValue Add) { 1954 for (auto *Use : Add->uses()) { 1955 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 1956 Use->getOpcode() != ISD::ATOMIC_LOAD && 1957 Use->getOpcode() != ISD::ATOMIC_STORE) 1958 return false; 1959 EVT VT = cast<MemSDNode>(Use)->getMemoryVT(); 1960 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && 1961 VT != MVT::f64) 1962 return false; 1963 // Don't allow stores of the value. It must be used as the address. 1964 if (Use->getOpcode() == ISD::STORE && 1965 cast<StoreSDNode>(Use)->getValue() == Add) 1966 return false; 1967 if (Use->getOpcode() == ISD::ATOMIC_STORE && 1968 cast<AtomicSDNode>(Use)->getVal() == Add) 1969 return false; 1970 } 1971 1972 return true; 1973 } 1974 1975 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 1976 SDValue &Offset) { 1977 if (SelectAddrFrameIndex(Addr, Base, Offset)) 1978 return true; 1979 1980 SDLoc DL(Addr); 1981 MVT VT = Addr.getSimpleValueType(); 1982 1983 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 1984 Base = Addr.getOperand(0); 1985 Offset = Addr.getOperand(1); 1986 return true; 1987 } 1988 1989 if (CurDAG->isBaseWithConstantOffset(Addr)) { 1990 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 1991 if (isInt<12>(CVal)) { 1992 Base = Addr.getOperand(0); 1993 if (Base.getOpcode() == RISCVISD::ADD_LO) { 1994 SDValue LoOperand = Base.getOperand(1); 1995 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 1996 // If the Lo in (ADD_LO hi, lo) is a global variable's address 1997 // (its low part, really), then we can rely on the alignment of that 1998 // variable to provide a margin of safety before low part can overflow 1999 // the 12 bits of the load/store offset. Check if CVal falls within 2000 // that margin; if so (low part + CVal) can't overflow. 2001 const DataLayout &DL = CurDAG->getDataLayout(); 2002 Align Alignment = commonAlignment( 2003 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 2004 if (CVal == 0 || Alignment > CVal) { 2005 int64_t CombinedOffset = CVal + GA->getOffset(); 2006 Base = Base.getOperand(0); 2007 Offset = CurDAG->getTargetGlobalAddress( 2008 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 2009 CombinedOffset, GA->getTargetFlags()); 2010 return true; 2011 } 2012 } 2013 } 2014 2015 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2016 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2017 Offset = CurDAG->getTargetConstant(CVal, DL, VT); 2018 return true; 2019 } 2020 } 2021 2022 // Handle ADD with large immediates. 2023 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2024 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2025 assert(!isInt<12>(CVal) && "simm12 not already handled?"); 2026 2027 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use 2028 // an ADDI for part of the offset and fold the rest into the load/store. 2029 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 2030 if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { 2031 int64_t Adj = CVal < 0 ? -2048 : 2047; 2032 Base = SDValue( 2033 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 2034 CurDAG->getTargetConstant(Adj, DL, VT)), 2035 0); 2036 Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); 2037 return true; 2038 } 2039 2040 // For larger immediates, we might be able to save one instruction from 2041 // constant materialization by folding the Lo12 bits of the immediate into 2042 // the address. We should only do this if the ADD is only used by loads and 2043 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled 2044 // separately with the full materialized immediate creating extra 2045 // instructions. 2046 if (isWorthFoldingAdd(Addr) && 2047 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2048 Offset)) { 2049 // Insert an ADD instruction with the materialized Hi52 bits. 2050 Base = SDValue( 2051 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2052 0); 2053 return true; 2054 } 2055 } 2056 2057 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset)) 2058 return true; 2059 2060 Base = Addr; 2061 Offset = CurDAG->getTargetConstant(0, DL, VT); 2062 return true; 2063 } 2064 2065 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 2066 SDValue &ShAmt) { 2067 ShAmt = N; 2068 2069 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 2070 // amount. If there is an AND on the shift amount, we can bypass it if it 2071 // doesn't affect any of those bits. 2072 if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2073 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1); 2074 2075 // Since the max shift amount is a power of 2 we can subtract 1 to make a 2076 // mask that covers the bits needed to represent all shift amounts. 2077 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 2078 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 2079 2080 if (ShMask.isSubsetOf(AndMask)) { 2081 ShAmt = ShAmt.getOperand(0); 2082 } else { 2083 // SimplifyDemandedBits may have optimized the mask so try restoring any 2084 // bits that are known zero. 2085 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0)); 2086 if (!ShMask.isSubsetOf(AndMask | Known.Zero)) 2087 return true; 2088 ShAmt = ShAmt.getOperand(0); 2089 } 2090 } 2091 2092 if (ShAmt.getOpcode() == ISD::ADD && 2093 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 2094 uint64_t Imm = ShAmt.getConstantOperandVal(1); 2095 // If we are shifting by X+N where N == 0 mod Size, then just shift by X 2096 // to avoid the ADD. 2097 if (Imm != 0 && Imm % ShiftWidth == 0) { 2098 ShAmt = ShAmt.getOperand(0); 2099 return true; 2100 } 2101 } else if (ShAmt.getOpcode() == ISD::SUB && 2102 isa<ConstantSDNode>(ShAmt.getOperand(0))) { 2103 uint64_t Imm = ShAmt.getConstantOperandVal(0); 2104 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2105 // generate a NEG instead of a SUB of a constant. 2106 if (Imm != 0 && Imm % ShiftWidth == 0) { 2107 SDLoc DL(ShAmt); 2108 EVT VT = ShAmt.getValueType(); 2109 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 2110 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 2111 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 2112 ShAmt.getOperand(1)); 2113 ShAmt = SDValue(Neg, 0); 2114 return true; 2115 } 2116 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 2117 // to generate a NOT instead of a SUB of a constant. 2118 if (Imm % ShiftWidth == ShiftWidth - 1) { 2119 SDLoc DL(ShAmt); 2120 EVT VT = ShAmt.getValueType(); 2121 MachineSDNode *Not = 2122 CurDAG->getMachineNode(RISCV::XORI, DL, VT, ShAmt.getOperand(1), 2123 CurDAG->getTargetConstant(-1, DL, VT)); 2124 ShAmt = SDValue(Not, 0); 2125 return true; 2126 } 2127 } 2128 2129 return true; 2130 } 2131 2132 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 2133 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 2134 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 2135 Val = N.getOperand(0); 2136 return true; 2137 } 2138 MVT VT = N.getSimpleValueType(); 2139 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 2140 Val = N; 2141 return true; 2142 } 2143 2144 return false; 2145 } 2146 2147 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) { 2148 if (N.getOpcode() == ISD::AND) { 2149 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2150 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { 2151 Val = N.getOperand(0); 2152 return true; 2153 } 2154 } 2155 MVT VT = N.getSimpleValueType(); 2156 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits); 2157 if (CurDAG->MaskedValueIsZero(N, Mask)) { 2158 Val = N; 2159 return true; 2160 } 2161 2162 return false; 2163 } 2164 2165 /// Look for various patterns that can be done with a SHL that can be folded 2166 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 2167 /// SHXADD we are trying to match. 2168 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 2169 SDValue &Val) { 2170 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 2171 SDValue N0 = N.getOperand(0); 2172 2173 bool LeftShift = N0.getOpcode() == ISD::SHL; 2174 if ((LeftShift || N0.getOpcode() == ISD::SRL) && 2175 isa<ConstantSDNode>(N0.getOperand(1))) { 2176 uint64_t Mask = N.getConstantOperandVal(1); 2177 unsigned C2 = N0.getConstantOperandVal(1); 2178 2179 unsigned XLen = Subtarget->getXLen(); 2180 if (LeftShift) 2181 Mask &= maskTrailingZeros<uint64_t>(C2); 2182 else 2183 Mask &= maskTrailingOnes<uint64_t>(XLen - C2); 2184 2185 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no 2186 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 2187 // followed by a SHXADD with c3 for the X amount. 2188 if (isShiftedMask_64(Mask)) { 2189 unsigned Leading = XLen - llvm::bit_width(Mask); 2190 unsigned Trailing = countTrailingZeros(Mask); 2191 if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) { 2192 SDLoc DL(N); 2193 EVT VT = N.getValueType(); 2194 Val = SDValue(CurDAG->getMachineNode( 2195 RISCV::SRLI, DL, VT, N0.getOperand(0), 2196 CurDAG->getTargetConstant(Trailing - C2, DL, VT)), 2197 0); 2198 return true; 2199 } 2200 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 2201 // leading zeros and c3 trailing zeros. We can use an SRLI by C3 2202 // followed by a SHXADD using c3 for the X amount. 2203 if (!LeftShift && Leading == C2 && Trailing == ShAmt) { 2204 SDLoc DL(N); 2205 EVT VT = N.getValueType(); 2206 Val = SDValue( 2207 CurDAG->getMachineNode( 2208 RISCV::SRLI, DL, VT, N0.getOperand(0), 2209 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)), 2210 0); 2211 return true; 2212 } 2213 } 2214 } 2215 } 2216 2217 bool LeftShift = N.getOpcode() == ISD::SHL; 2218 if ((LeftShift || N.getOpcode() == ISD::SRL) && 2219 isa<ConstantSDNode>(N.getOperand(1))) { 2220 SDValue N0 = N.getOperand(0); 2221 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 2222 isa<ConstantSDNode>(N0.getOperand(1))) { 2223 uint64_t Mask = N0.getConstantOperandVal(1); 2224 if (isShiftedMask_64(Mask)) { 2225 unsigned C1 = N.getConstantOperandVal(1); 2226 unsigned XLen = Subtarget->getXLen(); 2227 unsigned Leading = XLen - llvm::bit_width(Mask); 2228 unsigned Trailing = countTrailingZeros(Mask); 2229 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 2230 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 2231 if (LeftShift && Leading == 32 && Trailing > 0 && 2232 (Trailing + C1) == ShAmt) { 2233 SDLoc DL(N); 2234 EVT VT = N.getValueType(); 2235 Val = SDValue(CurDAG->getMachineNode( 2236 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2237 CurDAG->getTargetConstant(Trailing, DL, VT)), 2238 0); 2239 return true; 2240 } 2241 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 2242 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 2243 if (!LeftShift && Leading == 32 && Trailing > C1 && 2244 (Trailing - C1) == ShAmt) { 2245 SDLoc DL(N); 2246 EVT VT = N.getValueType(); 2247 Val = SDValue(CurDAG->getMachineNode( 2248 RISCV::SRLIW, DL, VT, N0.getOperand(0), 2249 CurDAG->getTargetConstant(Trailing, DL, VT)), 2250 0); 2251 return true; 2252 } 2253 } 2254 } 2255 } 2256 2257 return false; 2258 } 2259 2260 /// Look for various patterns that can be done with a SHL that can be folded 2261 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which 2262 /// SHXADD_UW we are trying to match. 2263 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, 2264 SDValue &Val) { 2265 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) && 2266 N.hasOneUse()) { 2267 SDValue N0 = N.getOperand(0); 2268 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 2269 N0.hasOneUse()) { 2270 uint64_t Mask = N.getConstantOperandVal(1); 2271 unsigned C2 = N0.getConstantOperandVal(1); 2272 2273 Mask &= maskTrailingZeros<uint64_t>(C2); 2274 2275 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 2276 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by 2277 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount. 2278 if (isShiftedMask_64(Mask)) { 2279 unsigned Leading = countLeadingZeros(Mask); 2280 unsigned Trailing = countTrailingZeros(Mask); 2281 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) { 2282 SDLoc DL(N); 2283 EVT VT = N.getValueType(); 2284 Val = SDValue(CurDAG->getMachineNode( 2285 RISCV::SLLI, DL, VT, N0.getOperand(0), 2286 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)), 2287 0); 2288 return true; 2289 } 2290 } 2291 } 2292 } 2293 2294 return false; 2295 } 2296 2297 // Return true if all users of this SDNode* only consume the lower \p Bits. 2298 // This can be used to form W instructions for add/sub/mul/shl even when the 2299 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 2300 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 2301 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 2302 // the add/sub/mul/shl to become non-W instructions. By checking the users we 2303 // may be able to use a W instruction and CSE with the other instruction if 2304 // this has happened. We could try to detect that the CSE opportunity exists 2305 // before doing this, but that would be more complicated. 2306 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, 2307 const unsigned Depth) const { 2308 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 2309 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 2310 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || 2311 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || 2312 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 2313 isa<ConstantSDNode>(Node) || Depth != 0) && 2314 "Unexpected opcode"); 2315 2316 if (Depth >= SelectionDAG::MaxRecursionDepth) 2317 return false; 2318 2319 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 2320 SDNode *User = *UI; 2321 // Users of this node should have already been instruction selected 2322 if (!User->isMachineOpcode()) 2323 return false; 2324 2325 // TODO: Add more opcodes? 2326 switch (User->getMachineOpcode()) { 2327 default: 2328 return false; 2329 case RISCV::ADDW: 2330 case RISCV::ADDIW: 2331 case RISCV::SUBW: 2332 case RISCV::MULW: 2333 case RISCV::SLLW: 2334 case RISCV::SLLIW: 2335 case RISCV::SRAW: 2336 case RISCV::SRAIW: 2337 case RISCV::SRLW: 2338 case RISCV::SRLIW: 2339 case RISCV::DIVW: 2340 case RISCV::DIVUW: 2341 case RISCV::REMW: 2342 case RISCV::REMUW: 2343 case RISCV::ROLW: 2344 case RISCV::RORW: 2345 case RISCV::RORIW: 2346 case RISCV::CLZW: 2347 case RISCV::CTZW: 2348 case RISCV::CPOPW: 2349 case RISCV::SLLI_UW: 2350 case RISCV::FMV_W_X: 2351 case RISCV::FCVT_H_W: 2352 case RISCV::FCVT_H_WU: 2353 case RISCV::FCVT_S_W: 2354 case RISCV::FCVT_S_WU: 2355 case RISCV::FCVT_D_W: 2356 case RISCV::FCVT_D_WU: 2357 if (Bits < 32) 2358 return false; 2359 break; 2360 case RISCV::SLL: 2361 case RISCV::SRA: 2362 case RISCV::SRL: 2363 case RISCV::ROL: 2364 case RISCV::ROR: 2365 case RISCV::BSET: 2366 case RISCV::BCLR: 2367 case RISCV::BINV: 2368 // Shift amount operands only use log2(Xlen) bits. 2369 if (UI.getOperandNo() != 1 || Bits < Log2_32(Subtarget->getXLen())) 2370 return false; 2371 break; 2372 case RISCV::SLLI: 2373 // SLLI only uses the lower (XLen - ShAmt) bits. 2374 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 2375 return false; 2376 break; 2377 case RISCV::ANDI: 2378 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1))) 2379 break; 2380 goto RecCheck; 2381 case RISCV::ORI: { 2382 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue(); 2383 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 2384 break; 2385 [[fallthrough]]; 2386 } 2387 case RISCV::AND: 2388 case RISCV::OR: 2389 case RISCV::XOR: 2390 case RISCV::XORI: 2391 case RISCV::ANDN: 2392 case RISCV::ORN: 2393 case RISCV::XNOR: 2394 case RISCV::SH1ADD: 2395 case RISCV::SH2ADD: 2396 case RISCV::SH3ADD: 2397 RecCheck: 2398 if (hasAllNBitUsers(User, Bits, Depth + 1)) 2399 break; 2400 return false; 2401 case RISCV::SRLI: { 2402 unsigned ShAmt = User->getConstantOperandVal(1); 2403 // If we are shifting right by less than Bits, and users don't demand any 2404 // bits that were shifted into [Bits-1:0], then we can consider this as an 2405 // N-Bit user. 2406 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1)) 2407 break; 2408 return false; 2409 } 2410 case RISCV::SEXT_B: 2411 case RISCV::PACKH: 2412 if (Bits < 8) 2413 return false; 2414 break; 2415 case RISCV::SEXT_H: 2416 case RISCV::FMV_H_X: 2417 case RISCV::ZEXT_H_RV32: 2418 case RISCV::ZEXT_H_RV64: 2419 case RISCV::PACKW: 2420 if (Bits < 16) 2421 return false; 2422 break; 2423 case RISCV::PACK: 2424 if (Bits < (Subtarget->getXLen() / 2)) 2425 return false; 2426 break; 2427 case RISCV::ADD_UW: 2428 case RISCV::SH1ADD_UW: 2429 case RISCV::SH2ADD_UW: 2430 case RISCV::SH3ADD_UW: 2431 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 2432 // 32 bits. 2433 if (UI.getOperandNo() != 0 || Bits < 32) 2434 return false; 2435 break; 2436 case RISCV::SB: 2437 if (UI.getOperandNo() != 0 || Bits < 8) 2438 return false; 2439 break; 2440 case RISCV::SH: 2441 if (UI.getOperandNo() != 0 || Bits < 16) 2442 return false; 2443 break; 2444 case RISCV::SW: 2445 if (UI.getOperandNo() != 0 || Bits < 32) 2446 return false; 2447 break; 2448 } 2449 } 2450 2451 return true; 2452 } 2453 2454 // Select VL as a 5 bit immediate or a value that will become a register. This 2455 // allows us to choose betwen VSETIVLI or VSETVLI later. 2456 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 2457 auto *C = dyn_cast<ConstantSDNode>(N); 2458 if (C && isUInt<5>(C->getZExtValue())) { 2459 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 2460 N->getValueType(0)); 2461 } else if (C && C->isAllOnesValue()) { 2462 // Treat all ones as VLMax. 2463 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2464 N->getValueType(0)); 2465 } else if (isa<RegisterSDNode>(N) && 2466 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 2467 // All our VL operands use an operand that allows GPRNoX0 or an immediate 2468 // as the register class. Convert X0 to a special immediate to pass the 2469 // MachineVerifier. This is recognized specially by the vsetvli insertion 2470 // pass. 2471 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 2472 N->getValueType(0)); 2473 } else { 2474 VL = N; 2475 } 2476 2477 return true; 2478 } 2479 2480 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 2481 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) 2482 return false; 2483 assert(N.getNumOperands() == 3 && "Unexpected number of operands"); 2484 SplatVal = N.getOperand(1); 2485 return true; 2486 } 2487 2488 using ValidateFn = bool (*)(int64_t); 2489 2490 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 2491 SelectionDAG &DAG, 2492 const RISCVSubtarget &Subtarget, 2493 ValidateFn ValidateImm) { 2494 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2495 !isa<ConstantSDNode>(N.getOperand(1))) 2496 return false; 2497 assert(N.getNumOperands() == 3 && "Unexpected number of operands"); 2498 2499 int64_t SplatImm = 2500 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2501 2502 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 2503 // type is wider than the resulting vector element type: an implicit 2504 // truncation first takes place. Therefore, perform a manual 2505 // truncation/sign-extension in order to ignore any truncated bits and catch 2506 // any zero-extended immediate. 2507 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 2508 // sign-extending to (XLenVT -1). 2509 MVT XLenVT = Subtarget.getXLenVT(); 2510 assert(XLenVT == N.getOperand(1).getSimpleValueType() && 2511 "Unexpected splat operand type"); 2512 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 2513 if (EltVT.bitsLT(XLenVT)) 2514 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 2515 2516 if (!ValidateImm(SplatImm)) 2517 return false; 2518 2519 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 2520 return true; 2521 } 2522 2523 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 2524 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 2525 [](int64_t Imm) { return isInt<5>(Imm); }); 2526 } 2527 2528 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 2529 return selectVSplatSimmHelper( 2530 N, SplatVal, *CurDAG, *Subtarget, 2531 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 2532 } 2533 2534 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 2535 SDValue &SplatVal) { 2536 return selectVSplatSimmHelper( 2537 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 2538 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 2539 }); 2540 } 2541 2542 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 2543 if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || 2544 !isa<ConstantSDNode>(N.getOperand(1))) 2545 return false; 2546 2547 int64_t SplatImm = 2548 cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); 2549 2550 if (!isUInt<5>(SplatImm)) 2551 return false; 2552 2553 SplatVal = 2554 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 2555 2556 return true; 2557 } 2558 2559 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 2560 SDValue &Imm) { 2561 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 2562 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 2563 2564 if (!isInt<5>(ImmVal)) 2565 return false; 2566 2567 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 2568 return true; 2569 } 2570 2571 return false; 2572 } 2573 2574 // Try to remove sext.w if the input is a W instruction or can be made into 2575 // a W instruction cheaply. 2576 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2577 // Look for the sext.w pattern, addiw rd, rs1, 0. 2578 if (N->getMachineOpcode() != RISCV::ADDIW || 2579 !isNullConstant(N->getOperand(1))) 2580 return false; 2581 2582 SDValue N0 = N->getOperand(0); 2583 if (!N0.isMachineOpcode()) 2584 return false; 2585 2586 switch (N0.getMachineOpcode()) { 2587 default: 2588 break; 2589 case RISCV::ADD: 2590 case RISCV::ADDI: 2591 case RISCV::SUB: 2592 case RISCV::MUL: 2593 case RISCV::SLLI: { 2594 // Convert sext.w+add/sub/mul to their W instructions. This will create 2595 // a new independent instruction. This improves latency. 2596 unsigned Opc; 2597 switch (N0.getMachineOpcode()) { 2598 default: 2599 llvm_unreachable("Unexpected opcode!"); 2600 case RISCV::ADD: Opc = RISCV::ADDW; break; 2601 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2602 case RISCV::SUB: Opc = RISCV::SUBW; break; 2603 case RISCV::MUL: Opc = RISCV::MULW; break; 2604 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2605 } 2606 2607 SDValue N00 = N0.getOperand(0); 2608 SDValue N01 = N0.getOperand(1); 2609 2610 // Shift amount needs to be uimm5. 2611 if (N0.getMachineOpcode() == RISCV::SLLI && 2612 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2613 break; 2614 2615 SDNode *Result = 2616 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2617 N00, N01); 2618 ReplaceUses(N, Result); 2619 return true; 2620 } 2621 case RISCV::ADDW: 2622 case RISCV::ADDIW: 2623 case RISCV::SUBW: 2624 case RISCV::MULW: 2625 case RISCV::SLLIW: 2626 case RISCV::PACKW: 2627 // Result is already sign extended just remove the sext.w. 2628 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2629 ReplaceUses(N, N0.getNode()); 2630 return true; 2631 } 2632 2633 return false; 2634 } 2635 2636 // Return true if we can make sure mask of N is all-ones mask. 2637 static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { 2638 // Check that we're using V0 as a mask register. 2639 if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || 2640 cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) 2641 return false; 2642 2643 // The glued user defines V0. 2644 const auto *Glued = N->getGluedNode(); 2645 2646 if (!Glued || Glued->getOpcode() != ISD::CopyToReg) 2647 return false; 2648 2649 // Check that we're defining V0 as a mask register. 2650 if (!isa<RegisterSDNode>(Glued->getOperand(1)) || 2651 cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) 2652 return false; 2653 2654 // Check the instruction defining V0; it needs to be a VMSET pseudo. 2655 SDValue MaskSetter = Glued->getOperand(2); 2656 2657 const auto IsVMSet = [](unsigned Opc) { 2658 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 2659 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 2660 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 2661 Opc == RISCV::PseudoVMSET_M_B8; 2662 }; 2663 2664 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 2665 // undefined behaviour if it's the wrong bitwidth, so we could choose to 2666 // assume that it's all-ones? Same applies to its VL. 2667 return MaskSetter->isMachineOpcode() && 2668 IsVMSet(MaskSetter.getMachineOpcode()); 2669 } 2670 2671 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 2672 // corresponding "unmasked" pseudo versions. The mask we're interested in will 2673 // take the form of a V0 physical register operand, with a glued 2674 // register-setting instruction. 2675 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { 2676 const RISCV::RISCVMaskedPseudoInfo *I = 2677 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 2678 if (!I) 2679 return false; 2680 2681 unsigned MaskOpIdx = I->MaskOpIdx; 2682 2683 if (!usesAllOnesMask(N, MaskOpIdx)) 2684 return false; 2685 2686 // Retrieve the tail policy operand index, if any. 2687 std::optional<unsigned> TailPolicyOpIdx; 2688 const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); 2689 const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); 2690 2691 bool IsTA = true; 2692 if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { 2693 TailPolicyOpIdx = getVecPolicyOpIdx(N, MaskedMCID); 2694 if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & 2695 RISCVII::TAIL_AGNOSTIC)) { 2696 // Keep the true-masked instruction when there is no unmasked TU 2697 // instruction 2698 if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) 2699 return false; 2700 // We can't use TA if the tie-operand is not IMPLICIT_DEF 2701 if (!N->getOperand(0).isUndef()) 2702 IsTA = false; 2703 } 2704 } 2705 2706 unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; 2707 2708 // Check that we're dropping the mask operand and any policy operand 2709 // when we transform to this unmasked pseudo. Additionally, if this insturtion 2710 // is tail agnostic, the unmasked instruction should not have a merge op. 2711 uint64_t TSFlags = TII.get(Opc).TSFlags; 2712 assert((IsTA != RISCVII::hasMergeOp(TSFlags)) && 2713 RISCVII::hasDummyMaskOp(TSFlags) && 2714 !RISCVII::hasVecPolicyOp(TSFlags) && 2715 "Unexpected pseudo to transform to"); 2716 (void)TSFlags; 2717 2718 SmallVector<SDValue, 8> Ops; 2719 // Skip the merge operand at index 0 if IsTA 2720 for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { 2721 // Skip the mask, the policy, and the Glue. 2722 SDValue Op = N->getOperand(I); 2723 if (I == MaskOpIdx || I == TailPolicyOpIdx || 2724 Op.getValueType() == MVT::Glue) 2725 continue; 2726 Ops.push_back(Op); 2727 } 2728 2729 // Transitively apply any node glued to our new node. 2730 const auto *Glued = N->getGluedNode(); 2731 if (auto *TGlued = Glued->getGluedNode()) 2732 Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); 2733 2734 SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 2735 Result->setFlags(N->getFlags()); 2736 ReplaceUses(N, Result); 2737 2738 return true; 2739 } 2740 2741 // Try to fold VMERGE_VVM with unmasked intrinsic to masked intrinsic. The 2742 // peephole only deals with VMERGE_VVM which is TU and has false operand same as 2743 // its true operand now. E.g. (VMERGE_VVM_M1_TU False, False, (VADD_M1 ...), 2744 // ...) -> (VADD_VV_M1_MASK) 2745 bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) { 2746 unsigned Offset = IsTA ? 0 : 1; 2747 uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; 2748 2749 SDValue False = N->getOperand(0 + Offset); 2750 SDValue True = N->getOperand(1 + Offset); 2751 SDValue Mask = N->getOperand(2 + Offset); 2752 SDValue VL = N->getOperand(3 + Offset); 2753 2754 assert(True.getResNo() == 0 && 2755 "Expect True is the first output of an instruction."); 2756 2757 // Need N is the exactly one using True. 2758 if (!True.hasOneUse()) 2759 return false; 2760 2761 if (!True.isMachineOpcode()) 2762 return false; 2763 2764 unsigned TrueOpc = True.getMachineOpcode(); 2765 2766 // Skip if True has merge operand. 2767 // TODO: Deal with True having same merge operand with N. 2768 if (RISCVII::hasMergeOp(TII->get(TrueOpc).TSFlags)) 2769 return false; 2770 2771 // Skip if True has side effect. 2772 // TODO: Support velff and vlsegff. 2773 if (TII->get(TrueOpc).hasUnmodeledSideEffects()) 2774 return false; 2775 2776 // Only deal with True when True is unmasked intrinsic now. 2777 const RISCV::RISCVMaskedPseudoInfo *Info = 2778 RISCV::lookupMaskedIntrinsicByUnmaskedTA(TrueOpc); 2779 2780 if (!Info) 2781 return false; 2782 2783 // The last operand of unmasked intrinsic should be sew or chain. 2784 bool HasChainOp = 2785 True.getOperand(True.getNumOperands() - 1).getValueType() == MVT::Other; 2786 2787 if (HasChainOp) { 2788 // Avoid creating cycles in the DAG. We must ensure that none of the other 2789 // operands depend on True through it's Chain. 2790 SmallVector<const SDNode *, 4> LoopWorklist; 2791 SmallPtrSet<const SDNode *, 16> Visited; 2792 LoopWorklist.push_back(False.getNode()); 2793 LoopWorklist.push_back(Mask.getNode()); 2794 LoopWorklist.push_back(VL.getNode()); 2795 if (SDNode *Glued = N->getGluedNode()) 2796 LoopWorklist.push_back(Glued); 2797 if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) 2798 return false; 2799 } 2800 2801 // Need True has same VL with N. 2802 unsigned TrueVLIndex = True.getNumOperands() - HasChainOp - 2; 2803 SDValue TrueVL = True.getOperand(TrueVLIndex); 2804 2805 auto IsNoFPExcept = [this](SDValue N) { 2806 return !this->mayRaiseFPException(N.getNode()) || 2807 N->getFlags().hasNoFPExcept(); 2808 }; 2809 2810 // Allow the peephole for non-exception True with VLMAX vector length, since 2811 // all the values after VL of N are dependent on Merge. VLMAX should be 2812 // lowered to (XLenVT -1). 2813 if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL))) 2814 return false; 2815 2816 SDLoc DL(N); 2817 unsigned MaskedOpc = Info->MaskedPseudo; 2818 assert(RISCVII::hasVecPolicyOp(TII->get(MaskedOpc).TSFlags) && 2819 "Expected instructions with mask have policy operand."); 2820 assert(RISCVII::hasMergeOp(TII->get(MaskedOpc).TSFlags) && 2821 "Expected instructions with mask have merge operand."); 2822 2823 SmallVector<SDValue, 8> Ops; 2824 Ops.push_back(False); 2825 Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex); 2826 Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)}); 2827 Ops.push_back(CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT())); 2828 2829 // Result node should have chain operand of True. 2830 if (HasChainOp) 2831 Ops.push_back(True.getOperand(True.getNumOperands() - 1)); 2832 2833 // Result node should take over glued node of N. 2834 if (N->getGluedNode()) 2835 Ops.push_back(N->getOperand(N->getNumOperands() - 1)); 2836 2837 SDNode *Result = 2838 CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); 2839 Result->setFlags(True->getFlags()); 2840 2841 // Replace vmerge.vvm node by Result. 2842 ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); 2843 2844 // Replace another value of True. E.g. chain and VL. 2845 for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) 2846 ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); 2847 2848 // Try to transform Result to unmasked intrinsic. 2849 doPeepholeMaskedRVV(Result); 2850 return true; 2851 } 2852 2853 // Transform (VMERGE_VVM_<LMUL>_TU false, false, true, allones, vl, sew) to 2854 // (VADD_VI_<LMUL>_TU false, true, 0, vl, sew). It may decrease uses of VMSET. 2855 bool RISCVDAGToDAGISel::performVMergeToVAdd(SDNode *N) { 2856 unsigned NewOpc; 2857 switch (N->getMachineOpcode()) { 2858 default: 2859 llvm_unreachable("Expected VMERGE_VVM_<LMUL>_TU instruction."); 2860 case RISCV::PseudoVMERGE_VVM_MF8_TU: 2861 NewOpc = RISCV::PseudoVADD_VI_MF8_TU; 2862 break; 2863 case RISCV::PseudoVMERGE_VVM_MF4_TU: 2864 NewOpc = RISCV::PseudoVADD_VI_MF4_TU; 2865 break; 2866 case RISCV::PseudoVMERGE_VVM_MF2_TU: 2867 NewOpc = RISCV::PseudoVADD_VI_MF2_TU; 2868 break; 2869 case RISCV::PseudoVMERGE_VVM_M1_TU: 2870 NewOpc = RISCV::PseudoVADD_VI_M1_TU; 2871 break; 2872 case RISCV::PseudoVMERGE_VVM_M2_TU: 2873 NewOpc = RISCV::PseudoVADD_VI_M2_TU; 2874 break; 2875 case RISCV::PseudoVMERGE_VVM_M4_TU: 2876 NewOpc = RISCV::PseudoVADD_VI_M4_TU; 2877 break; 2878 case RISCV::PseudoVMERGE_VVM_M8_TU: 2879 NewOpc = RISCV::PseudoVADD_VI_M8_TU; 2880 break; 2881 } 2882 2883 if (!usesAllOnesMask(N, /* MaskOpIdx */ 3)) 2884 return false; 2885 2886 SDLoc DL(N); 2887 EVT VT = N->getValueType(0); 2888 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), 2889 CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT()), 2890 N->getOperand(4), N->getOperand(5)}; 2891 SDNode *Result = CurDAG->getMachineNode(NewOpc, DL, VT, Ops); 2892 ReplaceUses(N, Result); 2893 return true; 2894 } 2895 2896 bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { 2897 bool MadeChange = false; 2898 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 2899 2900 while (Position != CurDAG->allnodes_begin()) { 2901 SDNode *N = &*--Position; 2902 if (N->use_empty() || !N->isMachineOpcode()) 2903 continue; 2904 2905 auto IsVMergeTU = [](unsigned Opcode) { 2906 return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU || 2907 Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU || 2908 Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU || 2909 Opcode == RISCV::PseudoVMERGE_VVM_M1_TU || 2910 Opcode == RISCV::PseudoVMERGE_VVM_M2_TU || 2911 Opcode == RISCV::PseudoVMERGE_VVM_M4_TU || 2912 Opcode == RISCV::PseudoVMERGE_VVM_M8_TU; 2913 }; 2914 2915 auto IsVMergeTA = [](unsigned Opcode) { 2916 return Opcode == RISCV::PseudoVMERGE_VVM_MF8 || 2917 Opcode == RISCV::PseudoVMERGE_VVM_MF4 || 2918 Opcode == RISCV::PseudoVMERGE_VVM_MF2 || 2919 Opcode == RISCV::PseudoVMERGE_VVM_M1 || 2920 Opcode == RISCV::PseudoVMERGE_VVM_M2 || 2921 Opcode == RISCV::PseudoVMERGE_VVM_M4 || 2922 Opcode == RISCV::PseudoVMERGE_VVM_M8; 2923 }; 2924 2925 unsigned Opc = N->getMachineOpcode(); 2926 // The following optimizations require that the merge operand of N is same 2927 // as the false operand of N. 2928 if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) || 2929 IsVMergeTA(Opc)) 2930 MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc)); 2931 if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) 2932 MadeChange |= performVMergeToVAdd(N); 2933 } 2934 return MadeChange; 2935 } 2936 2937 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2938 // for instruction scheduling. 2939 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 2940 CodeGenOpt::Level OptLevel) { 2941 return new RISCVDAGToDAGISel(TM, OptLevel); 2942 } 2943 2944 char RISCVDAGToDAGISel::ID = 0; 2945 2946 INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) 2947