1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISCV target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVMCTargetDesc.h" 15 #include "MCTargetDesc/RISCVMatInt.h" 16 #include "RISCVISelLowering.h" 17 #include "RISCVMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/IR/IntrinsicsRISCV.h" 20 #include "llvm/Support/Alignment.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/KnownBits.h" 23 #include "llvm/Support/MathExtras.h" 24 #include "llvm/Support/raw_ostream.h" 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "riscv-isel" 29 30 namespace llvm { 31 namespace RISCV { 32 #define GET_RISCVVSSEGTable_IMPL 33 #define GET_RISCVVLSEGTable_IMPL 34 #define GET_RISCVVLXSEGTable_IMPL 35 #define GET_RISCVVSXSEGTable_IMPL 36 #define GET_RISCVVLETable_IMPL 37 #define GET_RISCVVSETable_IMPL 38 #define GET_RISCVVLXTable_IMPL 39 #define GET_RISCVVSXTable_IMPL 40 #include "RISCVGenSearchableTables.inc" 41 } // namespace RISCV 42 } // namespace llvm 43 44 void RISCVDAGToDAGISel::PreprocessISelDAG() { 45 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 46 E = CurDAG->allnodes_end(); 47 I != E;) { 48 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 49 50 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 51 // load. Done after lowering and combining so that we have a chance to 52 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 53 if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) 54 continue; 55 56 assert(N->getNumOperands() == 3 && "Unexpected number of operands"); 57 MVT VT = N->getSimpleValueType(0); 58 SDValue Lo = N->getOperand(0); 59 SDValue Hi = N->getOperand(1); 60 SDValue VL = N->getOperand(2); 61 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 62 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 63 "Unexpected VTs!"); 64 MachineFunction &MF = CurDAG->getMachineFunction(); 65 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); 66 SDLoc DL(N); 67 68 // We use the same frame index we use for moving two i32s into 64-bit FPR. 69 // This is an analogous operation. 70 int FI = FuncInfo->getMoveF64FrameIndex(MF); 71 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 72 const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); 73 SDValue StackSlot = 74 CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout())); 75 76 SDValue Chain = CurDAG->getEntryNode(); 77 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 78 79 SDValue OffsetSlot = 80 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL); 81 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 82 Align(8)); 83 84 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 85 86 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 87 SDValue IntID = 88 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 89 SDValue Ops[] = {Chain, 90 IntID, 91 CurDAG->getUNDEF(VT), 92 StackSlot, 93 CurDAG->getRegister(RISCV::X0, MVT::i64), 94 VL}; 95 96 SDValue Result = CurDAG->getMemIntrinsicNode( 97 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8), 98 MachineMemOperand::MOLoad); 99 100 // We're about to replace all uses of the SPLAT_VECTOR_SPLIT_I64 with the 101 // vlse we created. This will cause general havok on the dag because 102 // anything below the conversion could be folded into other existing nodes. 103 // To avoid invalidating 'I', back it up to the convert node. 104 --I; 105 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 106 107 // Now that we did that, the node is dead. Increment the iterator to the 108 // next node to process, then delete N. 109 ++I; 110 CurDAG->DeleteNode(N); 111 } 112 } 113 114 void RISCVDAGToDAGISel::PostprocessISelDAG() { 115 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 116 117 bool MadeChange = false; 118 while (Position != CurDAG->allnodes_begin()) { 119 SDNode *N = &*--Position; 120 // Skip dead nodes and any non-machine opcodes. 121 if (N->use_empty() || !N->isMachineOpcode()) 122 continue; 123 124 MadeChange |= doPeepholeSExtW(N); 125 MadeChange |= doPeepholeLoadStoreADDI(N); 126 } 127 128 if (MadeChange) 129 CurDAG->RemoveDeadNodes(); 130 } 131 132 static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, 133 const MVT VT, int64_t Imm, 134 const RISCVSubtarget &Subtarget) { 135 assert(VT == MVT::i64 && "Expecting MVT::i64"); 136 const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); 137 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool( 138 ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); 139 SDValue Addr = TLI->getAddr(CP, *CurDAG); 140 SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); 141 // Since there is no data race, the chain can be the entry node. 142 SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, 143 CurDAG->getEntryNode()); 144 MachineFunction &MF = CurDAG->getMachineFunction(); 145 MachineMemOperand *MemOp = MF.getMachineMemOperand( 146 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 147 LLT(VT), CP->getAlign()); 148 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp}); 149 return Load; 150 } 151 152 static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 153 int64_t Imm, const RISCVSubtarget &Subtarget) { 154 MVT XLenVT = Subtarget.getXLenVT(); 155 RISCVMatInt::InstSeq Seq = 156 RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); 157 158 // If Imm is expensive to build, then we put it into constant pool. 159 if (Subtarget.useConstantPoolForLargeInts() && 160 Seq.size() > Subtarget.getMaxBuildIntsCost()) 161 return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); 162 163 SDNode *Result = nullptr; 164 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); 165 for (RISCVMatInt::Inst &Inst : Seq) { 166 SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); 167 if (Inst.Opc == RISCV::LUI) 168 Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); 169 else if (Inst.Opc == RISCV::ADD_UW) 170 Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg, 171 CurDAG->getRegister(RISCV::X0, XLenVT)); 172 else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || 173 Inst.Opc == RISCV::SH3ADD) 174 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); 175 else 176 Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); 177 178 // Only the first instruction has X0 as its source. 179 SrcReg = SDValue(Result, 0); 180 } 181 182 return Result; 183 } 184 185 static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 186 unsigned RegClassID, unsigned SubReg0) { 187 assert(Regs.size() >= 2 && Regs.size() <= 8); 188 189 SDLoc DL(Regs[0]); 190 SmallVector<SDValue, 8> Ops; 191 192 Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); 193 194 for (unsigned I = 0; I < Regs.size(); ++I) { 195 Ops.push_back(Regs[I]); 196 Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); 197 } 198 SDNode *N = 199 CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 200 return SDValue(N, 0); 201 } 202 203 static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 204 unsigned NF) { 205 static const unsigned RegClassIDs[] = { 206 RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, 207 RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, 208 RISCV::VRN8M1RegClassID}; 209 210 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); 211 } 212 213 static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 214 unsigned NF) { 215 static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, 216 RISCV::VRN3M2RegClassID, 217 RISCV::VRN4M2RegClassID}; 218 219 return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); 220 } 221 222 static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 223 unsigned NF) { 224 return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, 225 RISCV::sub_vrm4_0); 226 } 227 228 static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, 229 unsigned NF, RISCVII::VLMUL LMUL) { 230 switch (LMUL) { 231 default: 232 llvm_unreachable("Invalid LMUL."); 233 case RISCVII::VLMUL::LMUL_F8: 234 case RISCVII::VLMUL::LMUL_F4: 235 case RISCVII::VLMUL::LMUL_F2: 236 case RISCVII::VLMUL::LMUL_1: 237 return createM1Tuple(CurDAG, Regs, NF); 238 case RISCVII::VLMUL::LMUL_2: 239 return createM2Tuple(CurDAG, Regs, NF); 240 case RISCVII::VLMUL::LMUL_4: 241 return createM4Tuple(CurDAG, Regs, NF); 242 } 243 } 244 245 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 246 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 247 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 248 bool IsLoad, MVT *IndexVT) { 249 SDValue Chain = Node->getOperand(0); 250 SDValue Glue; 251 252 SDValue Base; 253 SelectBaseAddr(Node->getOperand(CurOp++), Base); 254 Operands.push_back(Base); // Base pointer. 255 256 if (IsStridedOrIndexed) { 257 Operands.push_back(Node->getOperand(CurOp++)); // Index. 258 if (IndexVT) 259 *IndexVT = Operands.back()->getSimpleValueType(0); 260 } 261 262 if (IsMasked) { 263 // Mask needs to be copied to V0. 264 SDValue Mask = Node->getOperand(CurOp++); 265 Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); 266 Glue = Chain.getValue(1); 267 Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); 268 } 269 SDValue VL; 270 selectVLOp(Node->getOperand(CurOp++), VL); 271 Operands.push_back(VL); 272 273 MVT XLenVT = Subtarget->getXLenVT(); 274 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 275 Operands.push_back(SEWOp); 276 277 // Masked load has the tail policy argument. 278 if (IsMasked && IsLoad) { 279 // Policy must be a constant. 280 uint64_t Policy = Node->getConstantOperandVal(CurOp++); 281 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 282 Operands.push_back(PolicyOp); 283 } 284 285 Operands.push_back(Chain); // Chain. 286 if (Glue) 287 Operands.push_back(Glue); 288 } 289 290 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, 291 bool IsStrided) { 292 SDLoc DL(Node); 293 unsigned NF = Node->getNumValues() - 1; 294 MVT VT = Node->getSimpleValueType(0); 295 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 296 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 297 298 unsigned CurOp = 2; 299 SmallVector<SDValue, 8> Operands; 300 if (IsMasked) { 301 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 302 Node->op_begin() + CurOp + NF); 303 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 304 Operands.push_back(MaskedOff); 305 CurOp += NF; 306 } 307 308 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 309 Operands, /*IsLoad=*/true); 310 311 const RISCV::VLSEGPseudo *P = 312 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 313 static_cast<unsigned>(LMUL)); 314 MachineSDNode *Load = 315 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 316 317 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 318 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 319 320 SDValue SuperReg = SDValue(Load, 0); 321 for (unsigned I = 0; I < NF; ++I) { 322 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 323 ReplaceUses(SDValue(Node, I), 324 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 325 } 326 327 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 328 CurDAG->RemoveDeadNode(Node); 329 } 330 331 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { 332 SDLoc DL(Node); 333 unsigned NF = Node->getNumValues() - 2; // Do not count VL and Chain. 334 MVT VT = Node->getSimpleValueType(0); 335 MVT XLenVT = Subtarget->getXLenVT(); 336 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 337 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 338 339 unsigned CurOp = 2; 340 SmallVector<SDValue, 7> Operands; 341 if (IsMasked) { 342 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 343 Node->op_begin() + CurOp + NF); 344 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 345 Operands.push_back(MaskedOff); 346 CurOp += NF; 347 } 348 349 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 350 /*IsStridedOrIndexed*/ false, Operands, 351 /*IsLoad=*/true); 352 353 const RISCV::VLSEGPseudo *P = 354 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 355 Log2SEW, static_cast<unsigned>(LMUL)); 356 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 357 MVT::Other, MVT::Glue, Operands); 358 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 359 /*Glue*/ SDValue(Load, 2)); 360 361 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 362 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 363 364 SDValue SuperReg = SDValue(Load, 0); 365 for (unsigned I = 0; I < NF; ++I) { 366 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 367 ReplaceUses(SDValue(Node, I), 368 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 369 } 370 371 ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL 372 ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain 373 CurDAG->RemoveDeadNode(Node); 374 } 375 376 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, 377 bool IsOrdered) { 378 SDLoc DL(Node); 379 unsigned NF = Node->getNumValues() - 1; 380 MVT VT = Node->getSimpleValueType(0); 381 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 382 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 383 384 unsigned CurOp = 2; 385 SmallVector<SDValue, 8> Operands; 386 if (IsMasked) { 387 SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, 388 Node->op_begin() + CurOp + NF); 389 SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); 390 Operands.push_back(MaskedOff); 391 CurOp += NF; 392 } 393 394 MVT IndexVT; 395 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 396 /*IsStridedOrIndexed*/ true, Operands, 397 /*IsLoad=*/true, &IndexVT); 398 399 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 400 "Element count mismatch"); 401 402 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 403 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 404 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 405 report_fatal_error("The V extension does not support EEW=64 for index " 406 "values when XLEN=32"); 407 } 408 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 409 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 410 static_cast<unsigned>(IndexLMUL)); 411 MachineSDNode *Load = 412 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 413 414 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 415 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 416 417 SDValue SuperReg = SDValue(Load, 0); 418 for (unsigned I = 0; I < NF; ++I) { 419 unsigned SubRegIdx = RISCVTargetLowering::getSubregIndexByMVT(VT, I); 420 ReplaceUses(SDValue(Node, I), 421 CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); 422 } 423 424 ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); 425 CurDAG->RemoveDeadNode(Node); 426 } 427 428 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, bool IsMasked, 429 bool IsStrided) { 430 SDLoc DL(Node); 431 unsigned NF = Node->getNumOperands() - 4; 432 if (IsStrided) 433 NF--; 434 if (IsMasked) 435 NF--; 436 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 437 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 438 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 439 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 440 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 441 442 SmallVector<SDValue, 8> Operands; 443 Operands.push_back(StoreVal); 444 unsigned CurOp = 2 + NF; 445 446 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 447 Operands); 448 449 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 450 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 451 MachineSDNode *Store = 452 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 453 454 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 455 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 456 457 ReplaceNode(Node, Store); 458 } 459 460 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked, 461 bool IsOrdered) { 462 SDLoc DL(Node); 463 unsigned NF = Node->getNumOperands() - 5; 464 if (IsMasked) 465 --NF; 466 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 467 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 468 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 469 SmallVector<SDValue, 8> Regs(Node->op_begin() + 2, Node->op_begin() + 2 + NF); 470 SDValue StoreVal = createTuple(*CurDAG, Regs, NF, LMUL); 471 472 SmallVector<SDValue, 8> Operands; 473 Operands.push_back(StoreVal); 474 unsigned CurOp = 2 + NF; 475 476 MVT IndexVT; 477 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 478 /*IsStridedOrIndexed*/ true, Operands, 479 /*IsLoad=*/false, &IndexVT); 480 481 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 482 "Element count mismatch"); 483 484 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 485 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 486 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 487 report_fatal_error("The V extension does not support EEW=64 for index " 488 "values when XLEN=32"); 489 } 490 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 491 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 492 static_cast<unsigned>(IndexLMUL)); 493 MachineSDNode *Store = 494 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 495 496 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 497 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 498 499 ReplaceNode(Node, Store); 500 } 501 502 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 503 if (!Subtarget->hasVInstructions()) 504 return; 505 506 assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || 507 Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && 508 "Unexpected opcode"); 509 510 SDLoc DL(Node); 511 MVT XLenVT = Subtarget->getXLenVT(); 512 513 bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; 514 unsigned IntNoOffset = HasChain ? 1 : 0; 515 unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); 516 517 assert((IntNo == Intrinsic::riscv_vsetvli || 518 IntNo == Intrinsic::riscv_vsetvlimax || 519 IntNo == Intrinsic::riscv_vsetvli_opt || 520 IntNo == Intrinsic::riscv_vsetvlimax_opt) && 521 "Unexpected vsetvli intrinsic"); 522 523 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || 524 IntNo == Intrinsic::riscv_vsetvlimax_opt; 525 unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); 526 527 assert(Node->getNumOperands() == Offset + 2 && 528 "Unexpected number of operands"); 529 530 unsigned SEW = 531 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 532 RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>( 533 Node->getConstantOperandVal(Offset + 1) & 0x7); 534 535 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 536 /*MaskAgnostic*/ false); 537 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 538 539 SmallVector<EVT, 2> VTs = {XLenVT}; 540 if (HasChain) 541 VTs.push_back(MVT::Other); 542 543 SDValue VLOperand; 544 unsigned Opcode = RISCV::PseudoVSETVLI; 545 if (VLMax) { 546 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 547 Opcode = RISCV::PseudoVSETVLIX0; 548 } else { 549 VLOperand = Node->getOperand(IntNoOffset + 1); 550 551 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 552 uint64_t AVL = C->getZExtValue(); 553 if (isUInt<5>(AVL)) { 554 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 555 SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp}; 556 if (HasChain) 557 Ops.push_back(Node->getOperand(0)); 558 ReplaceNode( 559 Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); 560 return; 561 } 562 } 563 } 564 565 SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp}; 566 if (HasChain) 567 Ops.push_back(Node->getOperand(0)); 568 569 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); 570 } 571 572 void RISCVDAGToDAGISel::Select(SDNode *Node) { 573 // If we have a custom node, we have already selected. 574 if (Node->isMachineOpcode()) { 575 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 576 Node->setNodeId(-1); 577 return; 578 } 579 580 // Instruction Selection not handled by the auto-generated tablegen selection 581 // should be handled here. 582 unsigned Opcode = Node->getOpcode(); 583 MVT XLenVT = Subtarget->getXLenVT(); 584 SDLoc DL(Node); 585 MVT VT = Node->getSimpleValueType(0); 586 587 switch (Opcode) { 588 case ISD::Constant: { 589 auto *ConstNode = cast<ConstantSDNode>(Node); 590 if (VT == XLenVT && ConstNode->isZero()) { 591 SDValue New = 592 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT); 593 ReplaceNode(Node, New.getNode()); 594 return; 595 } 596 int64_t Imm = ConstNode->getSExtValue(); 597 // If the upper XLen-16 bits are not used, try to convert this to a simm12 598 // by sign extending bit 15. 599 if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && 600 hasAllHUsers(Node)) 601 Imm = SignExtend64(Imm, 16); 602 // If the upper 32-bits are not used try to convert this into a simm32 by 603 // sign extending bit 32. 604 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 605 Imm = SignExtend64(Imm, 32); 606 607 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); 608 return; 609 } 610 case ISD::FrameIndex: { 611 SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); 612 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 613 SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); 614 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); 615 return; 616 } 617 case ISD::SRL: { 618 // Optimize (srl (and X, C2), C) -> 619 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 620 // Where C2 is a mask with C3 trailing ones. 621 // Taking into account that the C2 may have had lower bits unset by 622 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 623 // This pattern occurs when type legalizing right shifts for types with 624 // less than XLen bits. 625 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 626 if (!N1C) 627 break; 628 SDValue N0 = Node->getOperand(0); 629 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 630 !isa<ConstantSDNode>(N0.getOperand(1))) 631 break; 632 unsigned ShAmt = N1C->getZExtValue(); 633 uint64_t Mask = N0.getConstantOperandVal(1); 634 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 635 if (!isMask_64(Mask)) 636 break; 637 unsigned TrailingOnes = countTrailingOnes(Mask); 638 // 32 trailing ones should use srliw via tablegen pattern. 639 if (TrailingOnes == 32 || ShAmt >= TrailingOnes) 640 break; 641 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 642 SDNode *SLLI = 643 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 644 CurDAG->getTargetConstant(LShAmt, DL, VT)); 645 SDNode *SRLI = CurDAG->getMachineNode( 646 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 647 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 648 ReplaceNode(Node, SRLI); 649 return; 650 } 651 case ISD::SRA: { 652 // Optimize (sra (sext_inreg X, i16), C) -> 653 // (srai (slli X, (XLen-16), (XLen-16) + C) 654 // And (sra (sext_inreg X, i8), C) -> 655 // (srai (slli X, (XLen-8), (XLen-8) + C) 656 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 657 // This transform matches the code we get without Zbb. The shifts are more 658 // compressible, and this can help expose CSE opportunities in the sdiv by 659 // constant optimization. 660 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 661 if (!N1C) 662 break; 663 SDValue N0 = Node->getOperand(0); 664 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 665 break; 666 unsigned ShAmt = N1C->getZExtValue(); 667 unsigned ExtSize = 668 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 669 // ExtSize of 32 should use sraiw via tablegen pattern. 670 if (ExtSize >= 32 || ShAmt >= ExtSize) 671 break; 672 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 673 SDNode *SLLI = 674 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 675 CurDAG->getTargetConstant(LShAmt, DL, VT)); 676 SDNode *SRAI = CurDAG->getMachineNode( 677 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 678 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 679 ReplaceNode(Node, SRAI); 680 return; 681 } 682 case ISD::AND: { 683 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 684 if (!N1C) 685 break; 686 687 SDValue N0 = Node->getOperand(0); 688 689 bool LeftShift = N0.getOpcode() == ISD::SHL; 690 if (!LeftShift && N0.getOpcode() != ISD::SRL) 691 break; 692 693 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 694 if (!C) 695 break; 696 uint64_t C2 = C->getZExtValue(); 697 unsigned XLen = Subtarget->getXLen(); 698 if (!C2 || C2 >= XLen) 699 break; 700 701 uint64_t C1 = N1C->getZExtValue(); 702 703 // Keep track of whether this is a andi, zext.h, or zext.w. 704 bool ZExtOrANDI = isInt<12>(N1C->getSExtValue()); 705 if (C1 == UINT64_C(0xFFFF) && 706 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) 707 ZExtOrANDI = true; 708 if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()) 709 ZExtOrANDI = true; 710 711 // Clear irrelevant bits in the mask. 712 if (LeftShift) 713 C1 &= maskTrailingZeros<uint64_t>(C2); 714 else 715 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 716 717 // Some transforms should only be done if the shift has a single use or 718 // the AND would become (srli (slli X, 32), 32) 719 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 720 721 SDValue X = N0.getOperand(0); 722 723 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 724 // with c3 leading zeros. 725 if (!LeftShift && isMask_64(C1)) { 726 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 727 if (C2 < C3) { 728 // If the number of leading zeros is C2+32 this can be SRLIW. 729 if (C2 + 32 == C3) { 730 SDNode *SRLIW = 731 CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, 732 CurDAG->getTargetConstant(C2, DL, XLenVT)); 733 ReplaceNode(Node, SRLIW); 734 return; 735 } 736 737 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if 738 // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 739 // 740 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 741 // legalized and goes through DAG combine. 742 SDValue Y; 743 if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && 744 selectSExti32(X, Y)) { 745 SDNode *SRAIW = 746 CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y, 747 CurDAG->getTargetConstant(31, DL, XLenVT)); 748 SDNode *SRLIW = CurDAG->getMachineNode( 749 RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), 750 CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); 751 ReplaceNode(Node, SRLIW); 752 return; 753 } 754 755 // (srli (slli x, c3-c2), c3). 756 if (OneUseOrZExtW && !ZExtOrANDI) { 757 SDNode *SLLI = CurDAG->getMachineNode( 758 RISCV::SLLI, DL, XLenVT, X, 759 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 760 SDNode *SRLI = 761 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 762 CurDAG->getTargetConstant(C3, DL, XLenVT)); 763 ReplaceNode(Node, SRLI); 764 return; 765 } 766 } 767 } 768 769 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 770 // shifted by c2 bits with c3 leading zeros. 771 if (LeftShift && isShiftedMask_64(C1)) { 772 uint64_t C3 = XLen - (64 - countLeadingZeros(C1)); 773 774 if (C2 + C3 < XLen && 775 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { 776 // Use slli.uw when possible. 777 if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { 778 SDNode *SLLI_UW = 779 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, 780 CurDAG->getTargetConstant(C2, DL, XLenVT)); 781 ReplaceNode(Node, SLLI_UW); 782 return; 783 } 784 785 // (srli (slli c2+c3), c3) 786 if (OneUseOrZExtW && !ZExtOrANDI) { 787 SDNode *SLLI = CurDAG->getMachineNode( 788 RISCV::SLLI, DL, XLenVT, X, 789 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 790 SDNode *SRLI = 791 CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), 792 CurDAG->getTargetConstant(C3, DL, XLenVT)); 793 ReplaceNode(Node, SRLI); 794 return; 795 } 796 } 797 } 798 799 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 800 // shifted mask with c2 leading zeros and c3 trailing zeros. 801 if (!LeftShift && isShiftedMask_64(C1)) { 802 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 803 uint64_t C3 = countTrailingZeros(C1); 804 if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) { 805 SDNode *SRLI = CurDAG->getMachineNode( 806 RISCV::SRLI, DL, XLenVT, X, 807 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 808 SDNode *SLLI = 809 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 810 CurDAG->getTargetConstant(C3, DL, XLenVT)); 811 ReplaceNode(Node, SLLI); 812 return; 813 } 814 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 815 if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && 816 OneUseOrZExtW && !ZExtOrANDI) { 817 SDNode *SRLIW = CurDAG->getMachineNode( 818 RISCV::SRLIW, DL, XLenVT, X, 819 CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); 820 SDNode *SLLI = 821 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 822 CurDAG->getTargetConstant(C3, DL, XLenVT)); 823 ReplaceNode(Node, SLLI); 824 return; 825 } 826 } 827 828 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 829 // shifted mask with no leading zeros and c3 trailing zeros. 830 if (LeftShift && isShiftedMask_64(C1)) { 831 uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); 832 uint64_t C3 = countTrailingZeros(C1); 833 if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) { 834 SDNode *SRLI = CurDAG->getMachineNode( 835 RISCV::SRLI, DL, XLenVT, X, 836 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 837 SDNode *SLLI = 838 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), 839 CurDAG->getTargetConstant(C3, DL, XLenVT)); 840 ReplaceNode(Node, SLLI); 841 return; 842 } 843 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 844 if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) { 845 SDNode *SRLIW = CurDAG->getMachineNode( 846 RISCV::SRLIW, DL, XLenVT, X, 847 CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); 848 SDNode *SLLI = 849 CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), 850 CurDAG->getTargetConstant(C3, DL, XLenVT)); 851 ReplaceNode(Node, SLLI); 852 return; 853 } 854 } 855 856 break; 857 } 858 case ISD::MUL: { 859 // Special case for calculating (mul (and X, C2), C1) where the full product 860 // fits in XLen bits. We can shift X left by the number of leading zeros in 861 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 862 // product has XLen trailing zeros, putting it in the output of MULHU. This 863 // can avoid materializing a constant in a register for C2. 864 865 // RHS should be a constant. 866 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 867 if (!N1C || !N1C->hasOneUse()) 868 break; 869 870 // LHS should be an AND with constant. 871 SDValue N0 = Node->getOperand(0); 872 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 873 break; 874 875 uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 876 877 // Constant should be a mask. 878 if (!isMask_64(C2)) 879 break; 880 881 // This should be the only use of the AND unless we will use 882 // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND 883 // constants. 884 if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) 885 break; 886 887 // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this 888 // optimization. 889 if (isInt<12>(C2) || 890 (C2 == UINT64_C(0xFFFF) && 891 (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || 892 (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) 893 break; 894 895 // We need to shift left the AND input and C1 by a total of XLen bits. 896 897 // How far left do we need to shift the AND input? 898 unsigned XLen = Subtarget->getXLen(); 899 unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); 900 901 // The constant gets shifted by the remaining amount unless that would 902 // shift bits out. 903 uint64_t C1 = N1C->getZExtValue(); 904 unsigned ConstantShift = XLen - LeadingZeros; 905 if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) 906 break; 907 908 uint64_t ShiftedC1 = C1 << ConstantShift; 909 // If this RV32, we need to sign extend the constant. 910 if (XLen == 32) 911 ShiftedC1 = SignExtend64(ShiftedC1, 32); 912 913 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 914 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); 915 SDNode *SLLI = 916 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 917 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 918 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 919 SDValue(SLLI, 0), SDValue(Imm, 0)); 920 ReplaceNode(Node, MULHU); 921 return; 922 } 923 case ISD::INTRINSIC_WO_CHAIN: { 924 unsigned IntNo = Node->getConstantOperandVal(0); 925 switch (IntNo) { 926 // By default we do not custom select any intrinsic. 927 default: 928 break; 929 case Intrinsic::riscv_vmsgeu: 930 case Intrinsic::riscv_vmsge: { 931 SDValue Src1 = Node->getOperand(1); 932 SDValue Src2 = Node->getOperand(2); 933 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 934 bool IsCmpUnsignedZero = false; 935 // Only custom select scalar second operand. 936 if (Src2.getValueType() != XLenVT) 937 break; 938 // Small constants are handled with patterns. 939 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 940 int64_t CVal = C->getSExtValue(); 941 if (CVal >= -15 && CVal <= 16) { 942 if (!IsUnsigned || CVal != 0) 943 break; 944 IsCmpUnsignedZero = true; 945 } 946 } 947 MVT Src1VT = Src1.getSimpleValueType(); 948 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode; 949 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 950 default: 951 llvm_unreachable("Unexpected LMUL!"); 952 #define CASE_VMSLT_VMNAND_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 953 case RISCVII::VLMUL::lmulenum: \ 954 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 955 : RISCV::PseudoVMSLT_VX_##suffix; \ 956 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 957 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 958 break; 959 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F8, MF8, B1) 960 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F4, MF4, B2) 961 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_F2, MF2, B4) 962 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_1, M1, B8) 963 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_2, M2, B16) 964 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_4, M4, B32) 965 CASE_VMSLT_VMNAND_VMSET_OPCODES(LMUL_8, M8, B64) 966 #undef CASE_VMSLT_VMNAND_VMSET_OPCODES 967 } 968 SDValue SEW = CurDAG->getTargetConstant( 969 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 970 SDValue VL; 971 selectVLOp(Node->getOperand(3), VL); 972 973 // If vmsgeu with 0 immediate, expand it to vmset. 974 if (IsCmpUnsignedZero) { 975 ReplaceNode(Node, CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW)); 976 return; 977 } 978 979 // Expand to 980 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 981 SDValue Cmp = SDValue( 982 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 983 0); 984 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 985 {Cmp, Cmp, VL, SEW})); 986 return; 987 } 988 case Intrinsic::riscv_vmsgeu_mask: 989 case Intrinsic::riscv_vmsge_mask: { 990 SDValue Src1 = Node->getOperand(2); 991 SDValue Src2 = Node->getOperand(3); 992 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 993 bool IsCmpUnsignedZero = false; 994 // Only custom select scalar second operand. 995 if (Src2.getValueType() != XLenVT) 996 break; 997 // Small constants are handled with patterns. 998 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 999 int64_t CVal = C->getSExtValue(); 1000 if (CVal >= -15 && CVal <= 16) { 1001 if (!IsUnsigned || CVal != 0) 1002 break; 1003 IsCmpUnsignedZero = true; 1004 } 1005 } 1006 MVT Src1VT = Src1.getSimpleValueType(); 1007 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1008 VMSetOpcode, VMANDOpcode; 1009 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1010 default: 1011 llvm_unreachable("Unexpected LMUL!"); 1012 #define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ 1013 case RISCVII::VLMUL::lmulenum: \ 1014 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1015 : RISCV::PseudoVMSLT_VX_##suffix; \ 1016 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1017 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1018 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ 1019 break; 1020 CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1) 1021 CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2) 1022 CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4) 1023 CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8) 1024 CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16) 1025 CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32) 1026 CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64) 1027 #undef CASE_VMSLT_VMSET_OPCODES 1028 } 1029 // Mask operations use the LMUL from the mask type. 1030 switch (RISCVTargetLowering::getLMUL(VT)) { 1031 default: 1032 llvm_unreachable("Unexpected LMUL!"); 1033 #define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \ 1034 case RISCVII::VLMUL::lmulenum: \ 1035 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 1036 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 1037 VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \ 1038 break; 1039 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8) 1040 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4) 1041 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2) 1042 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1) 1043 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2) 1044 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4) 1045 CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8) 1046 #undef CASE_VMXOR_VMANDN_VMAND_OPCODES 1047 } 1048 SDValue SEW = CurDAG->getTargetConstant( 1049 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1050 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1051 SDValue VL; 1052 selectVLOp(Node->getOperand(5), VL); 1053 SDValue MaskedOff = Node->getOperand(1); 1054 SDValue Mask = Node->getOperand(4); 1055 1056 // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}. 1057 if (IsCmpUnsignedZero) { 1058 SDValue VMSet = 1059 SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0); 1060 ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT, 1061 {Mask, VMSet, VL, MaskSEW})); 1062 return; 1063 } 1064 1065 // If the MaskedOff value and the Mask are the same value use 1066 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 1067 // This avoids needing to copy v0 to vd before starting the next sequence. 1068 if (Mask == MaskedOff) { 1069 SDValue Cmp = SDValue( 1070 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1071 0); 1072 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 1073 {Mask, Cmp, VL, MaskSEW})); 1074 return; 1075 } 1076 1077 // Mask needs to be copied to V0. 1078 SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, 1079 RISCV::V0, Mask, SDValue()); 1080 SDValue Glue = Chain.getValue(1); 1081 SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); 1082 1083 // Otherwise use 1084 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 1085 SDValue Cmp = SDValue( 1086 CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 1087 {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 1088 0); 1089 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 1090 {Cmp, Mask, VL, MaskSEW})); 1091 return; 1092 } 1093 case Intrinsic::riscv_vsetvli_opt: 1094 case Intrinsic::riscv_vsetvlimax_opt: 1095 return selectVSETVLI(Node); 1096 } 1097 break; 1098 } 1099 case ISD::INTRINSIC_W_CHAIN: { 1100 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1101 switch (IntNo) { 1102 // By default we do not custom select any intrinsic. 1103 default: 1104 break; 1105 case Intrinsic::riscv_vsetvli: 1106 case Intrinsic::riscv_vsetvlimax: 1107 return selectVSETVLI(Node); 1108 case Intrinsic::riscv_vlseg2: 1109 case Intrinsic::riscv_vlseg3: 1110 case Intrinsic::riscv_vlseg4: 1111 case Intrinsic::riscv_vlseg5: 1112 case Intrinsic::riscv_vlseg6: 1113 case Intrinsic::riscv_vlseg7: 1114 case Intrinsic::riscv_vlseg8: { 1115 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1116 return; 1117 } 1118 case Intrinsic::riscv_vlseg2_mask: 1119 case Intrinsic::riscv_vlseg3_mask: 1120 case Intrinsic::riscv_vlseg4_mask: 1121 case Intrinsic::riscv_vlseg5_mask: 1122 case Intrinsic::riscv_vlseg6_mask: 1123 case Intrinsic::riscv_vlseg7_mask: 1124 case Intrinsic::riscv_vlseg8_mask: { 1125 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1126 return; 1127 } 1128 case Intrinsic::riscv_vlsseg2: 1129 case Intrinsic::riscv_vlsseg3: 1130 case Intrinsic::riscv_vlsseg4: 1131 case Intrinsic::riscv_vlsseg5: 1132 case Intrinsic::riscv_vlsseg6: 1133 case Intrinsic::riscv_vlsseg7: 1134 case Intrinsic::riscv_vlsseg8: { 1135 selectVLSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1136 return; 1137 } 1138 case Intrinsic::riscv_vlsseg2_mask: 1139 case Intrinsic::riscv_vlsseg3_mask: 1140 case Intrinsic::riscv_vlsseg4_mask: 1141 case Intrinsic::riscv_vlsseg5_mask: 1142 case Intrinsic::riscv_vlsseg6_mask: 1143 case Intrinsic::riscv_vlsseg7_mask: 1144 case Intrinsic::riscv_vlsseg8_mask: { 1145 selectVLSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1146 return; 1147 } 1148 case Intrinsic::riscv_vloxseg2: 1149 case Intrinsic::riscv_vloxseg3: 1150 case Intrinsic::riscv_vloxseg4: 1151 case Intrinsic::riscv_vloxseg5: 1152 case Intrinsic::riscv_vloxseg6: 1153 case Intrinsic::riscv_vloxseg7: 1154 case Intrinsic::riscv_vloxseg8: 1155 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1156 return; 1157 case Intrinsic::riscv_vluxseg2: 1158 case Intrinsic::riscv_vluxseg3: 1159 case Intrinsic::riscv_vluxseg4: 1160 case Intrinsic::riscv_vluxseg5: 1161 case Intrinsic::riscv_vluxseg6: 1162 case Intrinsic::riscv_vluxseg7: 1163 case Intrinsic::riscv_vluxseg8: 1164 selectVLXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1165 return; 1166 case Intrinsic::riscv_vloxseg2_mask: 1167 case Intrinsic::riscv_vloxseg3_mask: 1168 case Intrinsic::riscv_vloxseg4_mask: 1169 case Intrinsic::riscv_vloxseg5_mask: 1170 case Intrinsic::riscv_vloxseg6_mask: 1171 case Intrinsic::riscv_vloxseg7_mask: 1172 case Intrinsic::riscv_vloxseg8_mask: 1173 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1174 return; 1175 case Intrinsic::riscv_vluxseg2_mask: 1176 case Intrinsic::riscv_vluxseg3_mask: 1177 case Intrinsic::riscv_vluxseg4_mask: 1178 case Intrinsic::riscv_vluxseg5_mask: 1179 case Intrinsic::riscv_vluxseg6_mask: 1180 case Intrinsic::riscv_vluxseg7_mask: 1181 case Intrinsic::riscv_vluxseg8_mask: 1182 selectVLXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1183 return; 1184 case Intrinsic::riscv_vlseg8ff: 1185 case Intrinsic::riscv_vlseg7ff: 1186 case Intrinsic::riscv_vlseg6ff: 1187 case Intrinsic::riscv_vlseg5ff: 1188 case Intrinsic::riscv_vlseg4ff: 1189 case Intrinsic::riscv_vlseg3ff: 1190 case Intrinsic::riscv_vlseg2ff: { 1191 selectVLSEGFF(Node, /*IsMasked*/ false); 1192 return; 1193 } 1194 case Intrinsic::riscv_vlseg8ff_mask: 1195 case Intrinsic::riscv_vlseg7ff_mask: 1196 case Intrinsic::riscv_vlseg6ff_mask: 1197 case Intrinsic::riscv_vlseg5ff_mask: 1198 case Intrinsic::riscv_vlseg4ff_mask: 1199 case Intrinsic::riscv_vlseg3ff_mask: 1200 case Intrinsic::riscv_vlseg2ff_mask: { 1201 selectVLSEGFF(Node, /*IsMasked*/ true); 1202 return; 1203 } 1204 case Intrinsic::riscv_vloxei: 1205 case Intrinsic::riscv_vloxei_mask: 1206 case Intrinsic::riscv_vluxei: 1207 case Intrinsic::riscv_vluxei_mask: { 1208 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 1209 IntNo == Intrinsic::riscv_vluxei_mask; 1210 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 1211 IntNo == Intrinsic::riscv_vloxei_mask; 1212 1213 MVT VT = Node->getSimpleValueType(0); 1214 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1215 1216 unsigned CurOp = 2; 1217 // Masked intrinsic only have TU version pseduo instructions. 1218 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1219 SmallVector<SDValue, 8> Operands; 1220 if (IsTU) 1221 Operands.push_back(Node->getOperand(CurOp++)); 1222 else 1223 // Skip the undef passthru operand for nomask TA version pseudo 1224 CurOp++; 1225 1226 MVT IndexVT; 1227 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1228 /*IsStridedOrIndexed*/ true, Operands, 1229 /*IsLoad=*/true, &IndexVT); 1230 1231 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1232 "Element count mismatch"); 1233 1234 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1235 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1236 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1237 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1238 report_fatal_error("The V extension does not support EEW=64 for index " 1239 "values when XLEN=32"); 1240 } 1241 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 1242 IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 1243 static_cast<unsigned>(IndexLMUL)); 1244 MachineSDNode *Load = 1245 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1246 1247 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1248 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1249 1250 ReplaceNode(Node, Load); 1251 return; 1252 } 1253 case Intrinsic::riscv_vlm: 1254 case Intrinsic::riscv_vle: 1255 case Intrinsic::riscv_vle_mask: 1256 case Intrinsic::riscv_vlse: 1257 case Intrinsic::riscv_vlse_mask: { 1258 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 1259 IntNo == Intrinsic::riscv_vlse_mask; 1260 bool IsStrided = 1261 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 1262 1263 MVT VT = Node->getSimpleValueType(0); 1264 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1265 1266 unsigned CurOp = 2; 1267 // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. 1268 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 1269 // Masked intrinsic only have TU version pseduo instructions. 1270 bool IsTU = 1271 HasPassthruOperand && 1272 ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); 1273 SmallVector<SDValue, 8> Operands; 1274 if (IsTU) 1275 Operands.push_back(Node->getOperand(CurOp++)); 1276 else if (HasPassthruOperand) 1277 // Skip the undef passthru operand for nomask TA version pseudo 1278 CurOp++; 1279 1280 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1281 Operands, /*IsLoad=*/true); 1282 1283 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1284 const RISCV::VLEPseudo *P = 1285 RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, 1286 static_cast<unsigned>(LMUL)); 1287 MachineSDNode *Load = 1288 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1289 1290 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1291 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1292 1293 ReplaceNode(Node, Load); 1294 return; 1295 } 1296 case Intrinsic::riscv_vleff: 1297 case Intrinsic::riscv_vleff_mask: { 1298 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 1299 1300 MVT VT = Node->getSimpleValueType(0); 1301 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1302 1303 unsigned CurOp = 2; 1304 // Masked intrinsic only have TU version pseduo instructions. 1305 bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); 1306 SmallVector<SDValue, 7> Operands; 1307 if (IsTU) 1308 Operands.push_back(Node->getOperand(CurOp++)); 1309 else 1310 // Skip the undef passthru operand for nomask TA version pseudo 1311 CurOp++; 1312 1313 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1314 /*IsStridedOrIndexed*/ false, Operands, 1315 /*IsLoad=*/true); 1316 1317 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1318 const RISCV::VLEPseudo *P = 1319 RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, 1320 Log2SEW, static_cast<unsigned>(LMUL)); 1321 MachineSDNode *Load = 1322 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), 1323 MVT::Other, MVT::Glue, Operands); 1324 SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, 1325 /*Glue*/ SDValue(Load, 2)); 1326 1327 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1328 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1329 1330 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 1331 ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL 1332 ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain 1333 CurDAG->RemoveDeadNode(Node); 1334 return; 1335 } 1336 } 1337 break; 1338 } 1339 case ISD::INTRINSIC_VOID: { 1340 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 1341 switch (IntNo) { 1342 case Intrinsic::riscv_vsseg2: 1343 case Intrinsic::riscv_vsseg3: 1344 case Intrinsic::riscv_vsseg4: 1345 case Intrinsic::riscv_vsseg5: 1346 case Intrinsic::riscv_vsseg6: 1347 case Intrinsic::riscv_vsseg7: 1348 case Intrinsic::riscv_vsseg8: { 1349 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ false); 1350 return; 1351 } 1352 case Intrinsic::riscv_vsseg2_mask: 1353 case Intrinsic::riscv_vsseg3_mask: 1354 case Intrinsic::riscv_vsseg4_mask: 1355 case Intrinsic::riscv_vsseg5_mask: 1356 case Intrinsic::riscv_vsseg6_mask: 1357 case Intrinsic::riscv_vsseg7_mask: 1358 case Intrinsic::riscv_vsseg8_mask: { 1359 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ false); 1360 return; 1361 } 1362 case Intrinsic::riscv_vssseg2: 1363 case Intrinsic::riscv_vssseg3: 1364 case Intrinsic::riscv_vssseg4: 1365 case Intrinsic::riscv_vssseg5: 1366 case Intrinsic::riscv_vssseg6: 1367 case Intrinsic::riscv_vssseg7: 1368 case Intrinsic::riscv_vssseg8: { 1369 selectVSSEG(Node, /*IsMasked*/ false, /*IsStrided*/ true); 1370 return; 1371 } 1372 case Intrinsic::riscv_vssseg2_mask: 1373 case Intrinsic::riscv_vssseg3_mask: 1374 case Intrinsic::riscv_vssseg4_mask: 1375 case Intrinsic::riscv_vssseg5_mask: 1376 case Intrinsic::riscv_vssseg6_mask: 1377 case Intrinsic::riscv_vssseg7_mask: 1378 case Intrinsic::riscv_vssseg8_mask: { 1379 selectVSSEG(Node, /*IsMasked*/ true, /*IsStrided*/ true); 1380 return; 1381 } 1382 case Intrinsic::riscv_vsoxseg2: 1383 case Intrinsic::riscv_vsoxseg3: 1384 case Intrinsic::riscv_vsoxseg4: 1385 case Intrinsic::riscv_vsoxseg5: 1386 case Intrinsic::riscv_vsoxseg6: 1387 case Intrinsic::riscv_vsoxseg7: 1388 case Intrinsic::riscv_vsoxseg8: 1389 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ true); 1390 return; 1391 case Intrinsic::riscv_vsuxseg2: 1392 case Intrinsic::riscv_vsuxseg3: 1393 case Intrinsic::riscv_vsuxseg4: 1394 case Intrinsic::riscv_vsuxseg5: 1395 case Intrinsic::riscv_vsuxseg6: 1396 case Intrinsic::riscv_vsuxseg7: 1397 case Intrinsic::riscv_vsuxseg8: 1398 selectVSXSEG(Node, /*IsMasked*/ false, /*IsOrdered*/ false); 1399 return; 1400 case Intrinsic::riscv_vsoxseg2_mask: 1401 case Intrinsic::riscv_vsoxseg3_mask: 1402 case Intrinsic::riscv_vsoxseg4_mask: 1403 case Intrinsic::riscv_vsoxseg5_mask: 1404 case Intrinsic::riscv_vsoxseg6_mask: 1405 case Intrinsic::riscv_vsoxseg7_mask: 1406 case Intrinsic::riscv_vsoxseg8_mask: 1407 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ true); 1408 return; 1409 case Intrinsic::riscv_vsuxseg2_mask: 1410 case Intrinsic::riscv_vsuxseg3_mask: 1411 case Intrinsic::riscv_vsuxseg4_mask: 1412 case Intrinsic::riscv_vsuxseg5_mask: 1413 case Intrinsic::riscv_vsuxseg6_mask: 1414 case Intrinsic::riscv_vsuxseg7_mask: 1415 case Intrinsic::riscv_vsuxseg8_mask: 1416 selectVSXSEG(Node, /*IsMasked*/ true, /*IsOrdered*/ false); 1417 return; 1418 case Intrinsic::riscv_vsoxei: 1419 case Intrinsic::riscv_vsoxei_mask: 1420 case Intrinsic::riscv_vsuxei: 1421 case Intrinsic::riscv_vsuxei_mask: { 1422 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 1423 IntNo == Intrinsic::riscv_vsuxei_mask; 1424 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 1425 IntNo == Intrinsic::riscv_vsoxei_mask; 1426 1427 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1428 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1429 1430 unsigned CurOp = 2; 1431 SmallVector<SDValue, 8> Operands; 1432 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1433 1434 MVT IndexVT; 1435 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 1436 /*IsStridedOrIndexed*/ true, Operands, 1437 /*IsLoad=*/false, &IndexVT); 1438 1439 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 1440 "Element count mismatch"); 1441 1442 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1443 RISCVII::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 1444 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 1445 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 1446 report_fatal_error("The V extension does not support EEW=64 for index " 1447 "values when XLEN=32"); 1448 } 1449 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 1450 IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, 1451 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 1452 MachineSDNode *Store = 1453 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1454 1455 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1456 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1457 1458 ReplaceNode(Node, Store); 1459 return; 1460 } 1461 case Intrinsic::riscv_vsm: 1462 case Intrinsic::riscv_vse: 1463 case Intrinsic::riscv_vse_mask: 1464 case Intrinsic::riscv_vsse: 1465 case Intrinsic::riscv_vsse_mask: { 1466 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 1467 IntNo == Intrinsic::riscv_vsse_mask; 1468 bool IsStrided = 1469 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 1470 1471 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 1472 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1473 1474 unsigned CurOp = 2; 1475 SmallVector<SDValue, 8> Operands; 1476 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 1477 1478 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 1479 Operands); 1480 1481 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1482 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 1483 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 1484 MachineSDNode *Store = 1485 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1486 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1487 CurDAG->setNodeMemRefs(Store, {MemOp->getMemOperand()}); 1488 1489 ReplaceNode(Node, Store); 1490 return; 1491 } 1492 } 1493 break; 1494 } 1495 case ISD::BITCAST: { 1496 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 1497 // Just drop bitcasts between vectors if both are fixed or both are 1498 // scalable. 1499 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 1500 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 1501 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 1502 CurDAG->RemoveDeadNode(Node); 1503 return; 1504 } 1505 break; 1506 } 1507 case ISD::INSERT_SUBVECTOR: { 1508 SDValue V = Node->getOperand(0); 1509 SDValue SubV = Node->getOperand(1); 1510 SDLoc DL(SubV); 1511 auto Idx = Node->getConstantOperandVal(2); 1512 MVT SubVecVT = SubV.getSimpleValueType(); 1513 1514 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1515 MVT SubVecContainerVT = SubVecVT; 1516 // Establish the correct scalable-vector types for any fixed-length type. 1517 if (SubVecVT.isFixedLengthVector()) 1518 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 1519 if (VT.isFixedLengthVector()) 1520 VT = TLI.getContainerForFixedLengthVector(VT); 1521 1522 const auto *TRI = Subtarget->getRegisterInfo(); 1523 unsigned SubRegIdx; 1524 std::tie(SubRegIdx, Idx) = 1525 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1526 VT, SubVecContainerVT, Idx, TRI); 1527 1528 // If the Idx hasn't been completely eliminated then this is a subvector 1529 // insert which doesn't naturally align to a vector register. These must 1530 // be handled using instructions to manipulate the vector registers. 1531 if (Idx != 0) 1532 break; 1533 1534 RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecContainerVT); 1535 bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 || 1536 SubVecLMUL == RISCVII::VLMUL::LMUL_F4 || 1537 SubVecLMUL == RISCVII::VLMUL::LMUL_F8; 1538 (void)IsSubVecPartReg; // Silence unused variable warning without asserts. 1539 assert((!IsSubVecPartReg || V.isUndef()) && 1540 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 1541 "the subvector is smaller than a full-sized register"); 1542 1543 // If we haven't set a SubRegIdx, then we must be going between 1544 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 1545 if (SubRegIdx == RISCV::NoSubRegister) { 1546 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(VT); 1547 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1548 InRegClassID && 1549 "Unexpected subvector extraction"); 1550 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1551 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1552 DL, VT, SubV, RC); 1553 ReplaceNode(Node, NewNode); 1554 return; 1555 } 1556 1557 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 1558 ReplaceNode(Node, Insert.getNode()); 1559 return; 1560 } 1561 case ISD::EXTRACT_SUBVECTOR: { 1562 SDValue V = Node->getOperand(0); 1563 auto Idx = Node->getConstantOperandVal(1); 1564 MVT InVT = V.getSimpleValueType(); 1565 SDLoc DL(V); 1566 1567 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 1568 MVT SubVecContainerVT = VT; 1569 // Establish the correct scalable-vector types for any fixed-length type. 1570 if (VT.isFixedLengthVector()) 1571 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 1572 if (InVT.isFixedLengthVector()) 1573 InVT = TLI.getContainerForFixedLengthVector(InVT); 1574 1575 const auto *TRI = Subtarget->getRegisterInfo(); 1576 unsigned SubRegIdx; 1577 std::tie(SubRegIdx, Idx) = 1578 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 1579 InVT, SubVecContainerVT, Idx, TRI); 1580 1581 // If the Idx hasn't been completely eliminated then this is a subvector 1582 // extract which doesn't naturally align to a vector register. These must 1583 // be handled using instructions to manipulate the vector registers. 1584 if (Idx != 0) 1585 break; 1586 1587 // If we haven't set a SubRegIdx, then we must be going between 1588 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 1589 if (SubRegIdx == RISCV::NoSubRegister) { 1590 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 1591 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 1592 InRegClassID && 1593 "Unexpected subvector extraction"); 1594 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 1595 SDNode *NewNode = 1596 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 1597 ReplaceNode(Node, NewNode); 1598 return; 1599 } 1600 1601 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 1602 ReplaceNode(Node, Extract.getNode()); 1603 return; 1604 } 1605 case ISD::SPLAT_VECTOR: 1606 case RISCVISD::VMV_S_X_VL: 1607 case RISCVISD::VFMV_S_F_VL: 1608 case RISCVISD::VMV_V_X_VL: 1609 case RISCVISD::VFMV_V_F_VL: { 1610 // Try to match splat of a scalar load to a strided load with stride of x0. 1611 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 1612 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 1613 if (IsScalarMove && !Node->getOperand(0).isUndef()) 1614 break; 1615 SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0); 1616 auto *Ld = dyn_cast<LoadSDNode>(Src); 1617 if (!Ld) 1618 break; 1619 EVT MemVT = Ld->getMemoryVT(); 1620 // The memory VT should be the same size as the element type. 1621 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 1622 break; 1623 if (!IsProfitableToFold(Src, Node, Node) || 1624 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 1625 break; 1626 1627 SDValue VL; 1628 if (Node->getOpcode() == ISD::SPLAT_VECTOR) 1629 VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); 1630 else if (IsScalarMove) { 1631 // We could deal with more VL if we update the VSETVLI insert pass to 1632 // avoid introducing more VSETVLI. 1633 if (!isOneConstant(Node->getOperand(2))) 1634 break; 1635 selectVLOp(Node->getOperand(2), VL); 1636 } else 1637 selectVLOp(Node->getOperand(1), VL); 1638 1639 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 1640 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 1641 1642 SDValue Operands[] = {Ld->getBasePtr(), 1643 CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW, 1644 Ld->getChain()}; 1645 1646 RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 1647 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 1648 /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false, 1649 Log2SEW, static_cast<unsigned>(LMUL)); 1650 MachineSDNode *Load = 1651 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 1652 1653 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 1654 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 1655 1656 ReplaceNode(Node, Load); 1657 return; 1658 } 1659 } 1660 1661 // Select the default instruction. 1662 SelectCode(Node); 1663 } 1664 1665 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 1666 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 1667 switch (ConstraintID) { 1668 case InlineAsm::Constraint_m: 1669 // We just support simple memory operands that have a single address 1670 // operand and need no special handling. 1671 OutOps.push_back(Op); 1672 return false; 1673 case InlineAsm::Constraint_A: 1674 OutOps.push_back(Op); 1675 return false; 1676 default: 1677 break; 1678 } 1679 1680 return true; 1681 } 1682 1683 bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { 1684 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 1685 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1686 return true; 1687 } 1688 return false; 1689 } 1690 1691 bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { 1692 // If this is FrameIndex, select it directly. Otherwise just let it get 1693 // selected to a register independently. 1694 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) 1695 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 1696 else 1697 Base = Addr; 1698 return true; 1699 } 1700 1701 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 1702 SDValue &ShAmt) { 1703 // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift 1704 // amount. If there is an AND on the shift amount, we can bypass it if it 1705 // doesn't affect any of those bits. 1706 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 1707 const APInt &AndMask = N->getConstantOperandAPInt(1); 1708 1709 // Since the max shift amount is a power of 2 we can subtract 1 to make a 1710 // mask that covers the bits needed to represent all shift amounts. 1711 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 1712 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 1713 1714 if (ShMask.isSubsetOf(AndMask)) { 1715 ShAmt = N.getOperand(0); 1716 return true; 1717 } 1718 1719 // SimplifyDemandedBits may have optimized the mask so try restoring any 1720 // bits that are known zero. 1721 KnownBits Known = CurDAG->computeKnownBits(N->getOperand(0)); 1722 if (ShMask.isSubsetOf(AndMask | Known.Zero)) { 1723 ShAmt = N.getOperand(0); 1724 return true; 1725 } 1726 } 1727 1728 ShAmt = N; 1729 return true; 1730 } 1731 1732 bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { 1733 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 1734 cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { 1735 Val = N.getOperand(0); 1736 return true; 1737 } 1738 MVT VT = N.getSimpleValueType(); 1739 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { 1740 Val = N; 1741 return true; 1742 } 1743 1744 return false; 1745 } 1746 1747 bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { 1748 if (N.getOpcode() == ISD::AND) { 1749 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1750 if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { 1751 Val = N.getOperand(0); 1752 return true; 1753 } 1754 } 1755 MVT VT = N.getSimpleValueType(); 1756 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); 1757 if (CurDAG->MaskedValueIsZero(N, Mask)) { 1758 Val = N; 1759 return true; 1760 } 1761 1762 return false; 1763 } 1764 1765 // Return true if all users of this SDNode* only consume the lower \p Bits. 1766 // This can be used to form W instructions for add/sub/mul/shl even when the 1767 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 1768 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 1769 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 1770 // the add/sub/mul/shl to become non-W instructions. By checking the users we 1771 // may be able to use a W instruction and CSE with the other instruction if 1772 // this has happened. We could try to detect that the CSE opportunity exists 1773 // before doing this, but that would be more complicated. 1774 // TODO: Does this need to look through AND/OR/XOR to their users to find more 1775 // opportunities. 1776 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { 1777 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 1778 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 1779 Node->getOpcode() == ISD::SRL || 1780 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 1781 isa<ConstantSDNode>(Node)) && 1782 "Unexpected opcode"); 1783 1784 for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { 1785 SDNode *User = *UI; 1786 // Users of this node should have already been instruction selected 1787 if (!User->isMachineOpcode()) 1788 return false; 1789 1790 // TODO: Add more opcodes? 1791 switch (User->getMachineOpcode()) { 1792 default: 1793 return false; 1794 case RISCV::ADDW: 1795 case RISCV::ADDIW: 1796 case RISCV::SUBW: 1797 case RISCV::MULW: 1798 case RISCV::SLLW: 1799 case RISCV::SLLIW: 1800 case RISCV::SRAW: 1801 case RISCV::SRAIW: 1802 case RISCV::SRLW: 1803 case RISCV::SRLIW: 1804 case RISCV::DIVW: 1805 case RISCV::DIVUW: 1806 case RISCV::REMW: 1807 case RISCV::REMUW: 1808 case RISCV::ROLW: 1809 case RISCV::RORW: 1810 case RISCV::RORIW: 1811 case RISCV::CLZW: 1812 case RISCV::CTZW: 1813 case RISCV::CPOPW: 1814 case RISCV::SLLI_UW: 1815 case RISCV::FCVT_H_W: 1816 case RISCV::FCVT_H_WU: 1817 case RISCV::FCVT_S_W: 1818 case RISCV::FCVT_S_WU: 1819 case RISCV::FCVT_D_W: 1820 case RISCV::FCVT_D_WU: 1821 if (Bits < 32) 1822 return false; 1823 break; 1824 case RISCV::SLLI: 1825 // SLLI only uses the lower (XLen - ShAmt) bits. 1826 if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1)) 1827 return false; 1828 break; 1829 case RISCV::ANDI: 1830 if (Bits < (64 - countLeadingZeros(User->getConstantOperandVal(1)))) 1831 return false; 1832 break; 1833 case RISCV::SEXT_B: 1834 if (Bits < 8) 1835 return false; 1836 break; 1837 case RISCV::SEXT_H: 1838 case RISCV::ZEXT_H_RV32: 1839 case RISCV::ZEXT_H_RV64: 1840 if (Bits < 16) 1841 return false; 1842 break; 1843 case RISCV::ADD_UW: 1844 case RISCV::SH1ADD_UW: 1845 case RISCV::SH2ADD_UW: 1846 case RISCV::SH3ADD_UW: 1847 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 1848 // 32 bits. 1849 if (UI.getOperandNo() != 0 || Bits < 32) 1850 return false; 1851 break; 1852 case RISCV::SB: 1853 if (UI.getOperandNo() != 0 || Bits < 8) 1854 return false; 1855 break; 1856 case RISCV::SH: 1857 if (UI.getOperandNo() != 0 || Bits < 16) 1858 return false; 1859 break; 1860 case RISCV::SW: 1861 if (UI.getOperandNo() != 0 || Bits < 32) 1862 return false; 1863 break; 1864 } 1865 } 1866 1867 return true; 1868 } 1869 1870 // Select VL as a 5 bit immediate or a value that will become a register. This 1871 // allows us to choose betwen VSETIVLI or VSETVLI later. 1872 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 1873 auto *C = dyn_cast<ConstantSDNode>(N); 1874 if (C && (isUInt<5>(C->getZExtValue()) || 1875 C->getSExtValue() == RISCV::VLMaxSentinel)) 1876 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 1877 N->getValueType(0)); 1878 else 1879 VL = N; 1880 1881 return true; 1882 } 1883 1884 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 1885 if (N.getOpcode() != ISD::SPLAT_VECTOR && 1886 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1887 N.getOpcode() != RISCVISD::VMV_V_X_VL) 1888 return false; 1889 SplatVal = N.getOperand(0); 1890 return true; 1891 } 1892 1893 using ValidateFn = bool (*)(int64_t); 1894 1895 static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, 1896 SelectionDAG &DAG, 1897 const RISCVSubtarget &Subtarget, 1898 ValidateFn ValidateImm) { 1899 if ((N.getOpcode() != ISD::SPLAT_VECTOR && 1900 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1901 N.getOpcode() != RISCVISD::VMV_V_X_VL) || 1902 !isa<ConstantSDNode>(N.getOperand(0))) 1903 return false; 1904 1905 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1906 1907 // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL 1908 // share semantics when the operand type is wider than the resulting vector 1909 // element type: an implicit truncation first takes place. Therefore, perform 1910 // a manual truncation/sign-extension in order to ignore any truncated bits 1911 // and catch any zero-extended immediate. 1912 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 1913 // sign-extending to (XLenVT -1). 1914 MVT XLenVT = Subtarget.getXLenVT(); 1915 assert(XLenVT == N.getOperand(0).getSimpleValueType() && 1916 "Unexpected splat operand type"); 1917 MVT EltVT = N.getSimpleValueType().getVectorElementType(); 1918 if (EltVT.bitsLT(XLenVT)) 1919 SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits()); 1920 1921 if (!ValidateImm(SplatImm)) 1922 return false; 1923 1924 SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT); 1925 return true; 1926 } 1927 1928 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 1929 return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget, 1930 [](int64_t Imm) { return isInt<5>(Imm); }); 1931 } 1932 1933 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 1934 return selectVSplatSimmHelper( 1935 N, SplatVal, *CurDAG, *Subtarget, 1936 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }); 1937 } 1938 1939 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 1940 SDValue &SplatVal) { 1941 return selectVSplatSimmHelper( 1942 N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) { 1943 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 1944 }); 1945 } 1946 1947 bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { 1948 if ((N.getOpcode() != ISD::SPLAT_VECTOR && 1949 N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && 1950 N.getOpcode() != RISCVISD::VMV_V_X_VL) || 1951 !isa<ConstantSDNode>(N.getOperand(0))) 1952 return false; 1953 1954 int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); 1955 1956 if (!isUInt<5>(SplatImm)) 1957 return false; 1958 1959 SplatVal = 1960 CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT()); 1961 1962 return true; 1963 } 1964 1965 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 1966 SDValue &Imm) { 1967 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 1968 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 1969 1970 if (!isInt<5>(ImmVal)) 1971 return false; 1972 1973 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), Subtarget->getXLenVT()); 1974 return true; 1975 } 1976 1977 return false; 1978 } 1979 1980 // Merge an ADDI into the offset of a load/store instruction where possible. 1981 // (load (addi base, off1), off2) -> (load base, off1+off2) 1982 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) 1983 // This is possible when off1+off2 fits a 12-bit immediate. 1984 bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { 1985 int OffsetOpIdx; 1986 int BaseOpIdx; 1987 1988 // Only attempt this optimisation for I-type loads and S-type stores. 1989 switch (N->getMachineOpcode()) { 1990 default: 1991 return false; 1992 case RISCV::LB: 1993 case RISCV::LH: 1994 case RISCV::LW: 1995 case RISCV::LBU: 1996 case RISCV::LHU: 1997 case RISCV::LWU: 1998 case RISCV::LD: 1999 case RISCV::FLH: 2000 case RISCV::FLW: 2001 case RISCV::FLD: 2002 BaseOpIdx = 0; 2003 OffsetOpIdx = 1; 2004 break; 2005 case RISCV::SB: 2006 case RISCV::SH: 2007 case RISCV::SW: 2008 case RISCV::SD: 2009 case RISCV::FSH: 2010 case RISCV::FSW: 2011 case RISCV::FSD: 2012 BaseOpIdx = 1; 2013 OffsetOpIdx = 2; 2014 break; 2015 } 2016 2017 if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) 2018 return false; 2019 2020 SDValue Base = N->getOperand(BaseOpIdx); 2021 2022 // If the base is an ADDI, we can merge it in to the load/store. 2023 if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) 2024 return false; 2025 2026 SDValue ImmOperand = Base.getOperand(1); 2027 uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); 2028 2029 if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) { 2030 int64_t Offset1 = Const->getSExtValue(); 2031 int64_t CombinedOffset = Offset1 + Offset2; 2032 if (!isInt<12>(CombinedOffset)) 2033 return false; 2034 ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), 2035 ImmOperand.getValueType()); 2036 } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) { 2037 // If the off1 in (addi base, off1) is a global variable's address (its 2038 // low part, really), then we can rely on the alignment of that variable 2039 // to provide a margin of safety before off1 can overflow the 12 bits. 2040 // Check if off2 falls within that margin; if so off1+off2 can't overflow. 2041 const DataLayout &DL = CurDAG->getDataLayout(); 2042 Align Alignment = GA->getGlobal()->getPointerAlignment(DL); 2043 if (Offset2 != 0 && Alignment <= Offset2) 2044 return false; 2045 int64_t Offset1 = GA->getOffset(); 2046 int64_t CombinedOffset = Offset1 + Offset2; 2047 ImmOperand = CurDAG->getTargetGlobalAddress( 2048 GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), 2049 CombinedOffset, GA->getTargetFlags()); 2050 } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { 2051 // Ditto. 2052 Align Alignment = CP->getAlign(); 2053 if (Offset2 != 0 && Alignment <= Offset2) 2054 return false; 2055 int64_t Offset1 = CP->getOffset(); 2056 int64_t CombinedOffset = Offset1 + Offset2; 2057 ImmOperand = CurDAG->getTargetConstantPool( 2058 CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), 2059 CombinedOffset, CP->getTargetFlags()); 2060 } else { 2061 return false; 2062 } 2063 2064 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 2065 LLVM_DEBUG(Base->dump(CurDAG)); 2066 LLVM_DEBUG(dbgs() << "\nN: "); 2067 LLVM_DEBUG(N->dump(CurDAG)); 2068 LLVM_DEBUG(dbgs() << "\n"); 2069 2070 // Modify the offset operand of the load/store. 2071 if (BaseOpIdx == 0) // Load 2072 CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, 2073 N->getOperand(2)); 2074 else // Store 2075 CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), 2076 ImmOperand, N->getOperand(3)); 2077 2078 return true; 2079 } 2080 2081 // Try to remove sext.w if the input is a W instruction or can be made into 2082 // a W instruction cheaply. 2083 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 2084 // Look for the sext.w pattern, addiw rd, rs1, 0. 2085 if (N->getMachineOpcode() != RISCV::ADDIW || 2086 !isNullConstant(N->getOperand(1))) 2087 return false; 2088 2089 SDValue N0 = N->getOperand(0); 2090 if (!N0.isMachineOpcode()) 2091 return false; 2092 2093 switch (N0.getMachineOpcode()) { 2094 default: 2095 break; 2096 case RISCV::ADD: 2097 case RISCV::ADDI: 2098 case RISCV::SUB: 2099 case RISCV::MUL: 2100 case RISCV::SLLI: { 2101 // Convert sext.w+add/sub/mul to their W instructions. This will create 2102 // a new independent instruction. This improves latency. 2103 unsigned Opc; 2104 switch (N0.getMachineOpcode()) { 2105 default: 2106 llvm_unreachable("Unexpected opcode!"); 2107 case RISCV::ADD: Opc = RISCV::ADDW; break; 2108 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 2109 case RISCV::SUB: Opc = RISCV::SUBW; break; 2110 case RISCV::MUL: Opc = RISCV::MULW; break; 2111 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 2112 } 2113 2114 SDValue N00 = N0.getOperand(0); 2115 SDValue N01 = N0.getOperand(1); 2116 2117 // Shift amount needs to be uimm5. 2118 if (N0.getMachineOpcode() == RISCV::SLLI && 2119 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 2120 break; 2121 2122 SDNode *Result = 2123 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 2124 N00, N01); 2125 ReplaceUses(N, Result); 2126 return true; 2127 } 2128 case RISCV::ADDW: 2129 case RISCV::ADDIW: 2130 case RISCV::SUBW: 2131 case RISCV::MULW: 2132 case RISCV::SLLIW: 2133 // Result is already sign extended just remove the sext.w. 2134 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 2135 ReplaceUses(N, N0.getNode()); 2136 return true; 2137 } 2138 2139 return false; 2140 } 2141 2142 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 2143 // for instruction scheduling. 2144 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { 2145 return new RISCVDAGToDAGISel(TM); 2146 } 2147