1 //===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the RISC-V target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "RISCVISelDAGToDAG.h" 14 #include "MCTargetDesc/RISCVBaseInfo.h" 15 #include "MCTargetDesc/RISCVMCTargetDesc.h" 16 #include "MCTargetDesc/RISCVMatInt.h" 17 #include "RISCVISelLowering.h" 18 #include "RISCVInstrInfo.h" 19 #include "RISCVSelectionDAGInfo.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/IR/IntrinsicsRISCV.h" 22 #include "llvm/Support/Alignment.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/MathExtras.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "riscv-isel" 30 #define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection" 31 32 static cl::opt<bool> UsePseudoMovImm( 33 "riscv-use-rematerializable-movimm", cl::Hidden, 34 cl::desc("Use a rematerializable pseudoinstruction for 2 instruction " 35 "constant materialization"), 36 cl::init(false)); 37 38 #define GET_DAGISEL_BODY RISCVDAGToDAGISel 39 #include "RISCVGenDAGISel.inc" 40 41 void RISCVDAGToDAGISel::PreprocessISelDAG() { 42 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 43 44 bool MadeChange = false; 45 while (Position != CurDAG->allnodes_begin()) { 46 SDNode *N = &*--Position; 47 if (N->use_empty()) 48 continue; 49 50 SDValue Result; 51 switch (N->getOpcode()) { 52 case ISD::SPLAT_VECTOR: { 53 // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point 54 // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. 55 MVT VT = N->getSimpleValueType(0); 56 unsigned Opc = 57 VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; 58 SDLoc DL(N); 59 SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 60 SDValue Src = N->getOperand(0); 61 if (VT.isInteger()) 62 Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(), 63 N->getOperand(0)); 64 Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL); 65 break; 66 } 67 case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: { 68 // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector 69 // load. Done after lowering and combining so that we have a chance to 70 // optimize this to VMV_V_X_VL when the upper bits aren't needed. 71 assert(N->getNumOperands() == 4 && "Unexpected number of operands"); 72 MVT VT = N->getSimpleValueType(0); 73 SDValue Passthru = N->getOperand(0); 74 SDValue Lo = N->getOperand(1); 75 SDValue Hi = N->getOperand(2); 76 SDValue VL = N->getOperand(3); 77 assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && 78 Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && 79 "Unexpected VTs!"); 80 MachineFunction &MF = CurDAG->getMachineFunction(); 81 SDLoc DL(N); 82 83 // Create temporary stack for each expanding node. 84 SDValue StackSlot = 85 CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8)); 86 int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex(); 87 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI); 88 89 SDValue Chain = CurDAG->getEntryNode(); 90 Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8)); 91 92 SDValue OffsetSlot = 93 CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL); 94 Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), 95 Align(8)); 96 97 Chain = CurDAG->getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 98 99 SDVTList VTs = CurDAG->getVTList({VT, MVT::Other}); 100 SDValue IntID = 101 CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); 102 SDValue Ops[] = {Chain, 103 IntID, 104 Passthru, 105 StackSlot, 106 CurDAG->getRegister(RISCV::X0, MVT::i64), 107 VL}; 108 109 Result = CurDAG->getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, 110 MVT::i64, MPI, Align(8), 111 MachineMemOperand::MOLoad); 112 break; 113 } 114 case ISD::FP_EXTEND: { 115 // We only have vector patterns for riscv_fpextend_vl in isel. 116 SDLoc DL(N); 117 MVT VT = N->getSimpleValueType(0); 118 if (!VT.isVector()) 119 break; 120 SDValue VLMAX = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); 121 SDValue TrueMask = CurDAG->getNode( 122 RISCVISD::VMSET_VL, DL, VT.changeVectorElementType(MVT::i1), VLMAX); 123 Result = CurDAG->getNode(RISCVISD::FP_EXTEND_VL, DL, VT, N->getOperand(0), 124 TrueMask, VLMAX); 125 break; 126 } 127 } 128 129 if (Result) { 130 LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: "); 131 LLVM_DEBUG(N->dump(CurDAG)); 132 LLVM_DEBUG(dbgs() << "\nNew: "); 133 LLVM_DEBUG(Result->dump(CurDAG)); 134 LLVM_DEBUG(dbgs() << "\n"); 135 136 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 137 MadeChange = true; 138 } 139 } 140 141 if (MadeChange) 142 CurDAG->RemoveDeadNodes(); 143 } 144 145 void RISCVDAGToDAGISel::PostprocessISelDAG() { 146 HandleSDNode Dummy(CurDAG->getRoot()); 147 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 148 149 bool MadeChange = false; 150 while (Position != CurDAG->allnodes_begin()) { 151 SDNode *N = &*--Position; 152 // Skip dead nodes and any non-machine opcodes. 153 if (N->use_empty() || !N->isMachineOpcode()) 154 continue; 155 156 MadeChange |= doPeepholeSExtW(N); 157 158 // FIXME: This is here only because the VMerge transform doesn't 159 // know how to handle masked true inputs. Once that has been moved 160 // to post-ISEL, this can be deleted as well. 161 MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N)); 162 } 163 164 CurDAG->setRoot(Dummy.getValue()); 165 166 // After we're done with everything else, convert IMPLICIT_DEF 167 // passthru operands to NoRegister. This is required to workaround 168 // an optimization deficiency in MachineCSE. This really should 169 // be merged back into each of the patterns (i.e. there's no good 170 // reason not to go directly to NoReg), but is being done this way 171 // to allow easy backporting. 172 MadeChange |= doPeepholeNoRegPassThru(); 173 174 if (MadeChange) 175 CurDAG->RemoveDeadNodes(); 176 } 177 178 static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 179 RISCVMatInt::InstSeq &Seq) { 180 SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); 181 for (const RISCVMatInt::Inst &Inst : Seq) { 182 SDValue SDImm = CurDAG->getSignedTargetConstant(Inst.getImm(), DL, VT); 183 SDNode *Result = nullptr; 184 switch (Inst.getOpndKind()) { 185 case RISCVMatInt::Imm: 186 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm); 187 break; 188 case RISCVMatInt::RegX0: 189 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, 190 CurDAG->getRegister(RISCV::X0, VT)); 191 break; 192 case RISCVMatInt::RegReg: 193 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SrcReg); 194 break; 195 case RISCVMatInt::RegImm: 196 Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SrcReg, SDImm); 197 break; 198 } 199 200 // Only the first instruction has X0 as its source. 201 SrcReg = SDValue(Result, 0); 202 } 203 204 return SrcReg; 205 } 206 207 static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, 208 int64_t Imm, const RISCVSubtarget &Subtarget) { 209 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget); 210 211 // Use a rematerializable pseudo instruction for short sequences if enabled. 212 if (Seq.size() == 2 && UsePseudoMovImm) 213 return SDValue( 214 CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT, 215 CurDAG->getSignedTargetConstant(Imm, DL, VT)), 216 0); 217 218 // See if we can create this constant as (ADD (SLLI X, C), X) where X is at 219 // worst an LUI+ADDIW. This will require an extra register, but avoids a 220 // constant pool. 221 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where 222 // low and high 32 bits are the same and bit 31 and 63 are set. 223 if (Seq.size() > 3) { 224 unsigned ShiftAmt, AddOpc; 225 RISCVMatInt::InstSeq SeqLo = 226 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc); 227 if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) { 228 SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo); 229 230 SDValue SLLI = SDValue( 231 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo, 232 CurDAG->getTargetConstant(ShiftAmt, DL, VT)), 233 0); 234 return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0); 235 } 236 } 237 238 // Otherwise, use the original sequence. 239 return selectImmSeq(CurDAG, DL, VT, Seq); 240 } 241 242 void RISCVDAGToDAGISel::addVectorLoadStoreOperands( 243 SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp, 244 bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands, 245 bool IsLoad, MVT *IndexVT) { 246 SDValue Chain = Node->getOperand(0); 247 248 Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. 249 250 if (IsStridedOrIndexed) { 251 Operands.push_back(Node->getOperand(CurOp++)); // Index. 252 if (IndexVT) 253 *IndexVT = Operands.back()->getSimpleValueType(0); 254 } 255 256 if (IsMasked) { 257 SDValue Mask = Node->getOperand(CurOp++); 258 Operands.push_back(Mask); 259 } 260 SDValue VL; 261 selectVLOp(Node->getOperand(CurOp++), VL); 262 Operands.push_back(VL); 263 264 MVT XLenVT = Subtarget->getXLenVT(); 265 SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 266 Operands.push_back(SEWOp); 267 268 // At the IR layer, all the masked load intrinsics have policy operands, 269 // none of the others do. All have passthru operands. For our pseudos, 270 // all loads have policy operands. 271 if (IsLoad) { 272 uint64_t Policy = RISCVVType::MASK_AGNOSTIC; 273 if (IsMasked) 274 Policy = Node->getConstantOperandVal(CurOp++); 275 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 276 Operands.push_back(PolicyOp); 277 } 278 279 Operands.push_back(Chain); // Chain. 280 } 281 282 void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, unsigned NF, bool IsMasked, 283 bool IsStrided) { 284 SDLoc DL(Node); 285 MVT VT = Node->getSimpleValueType(0); 286 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 287 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 288 289 unsigned CurOp = 2; 290 SmallVector<SDValue, 8> Operands; 291 292 Operands.push_back(Node->getOperand(CurOp++)); 293 294 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 295 Operands, /*IsLoad=*/true); 296 297 const RISCV::VLSEGPseudo *P = 298 RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, 299 static_cast<unsigned>(LMUL)); 300 MachineSDNode *Load = 301 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 302 303 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()}); 304 305 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 306 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); 307 CurDAG->RemoveDeadNode(Node); 308 } 309 310 void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, unsigned NF, 311 bool IsMasked) { 312 SDLoc DL(Node); 313 MVT VT = Node->getSimpleValueType(0); 314 MVT XLenVT = Subtarget->getXLenVT(); 315 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 316 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 317 318 unsigned CurOp = 2; 319 SmallVector<SDValue, 7> Operands; 320 321 Operands.push_back(Node->getOperand(CurOp++)); 322 323 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 324 /*IsStridedOrIndexed*/ false, Operands, 325 /*IsLoad=*/true); 326 327 const RISCV::VLSEGPseudo *P = 328 RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, 329 Log2SEW, static_cast<unsigned>(LMUL)); 330 MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, 331 XLenVT, MVT::Other, Operands); 332 333 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()}); 334 335 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); // Result 336 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); // VL 337 ReplaceUses(SDValue(Node, 2), SDValue(Load, 2)); // Chain 338 CurDAG->RemoveDeadNode(Node); 339 } 340 341 void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, unsigned NF, bool IsMasked, 342 bool IsOrdered) { 343 SDLoc DL(Node); 344 MVT VT = Node->getSimpleValueType(0); 345 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 346 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 347 348 unsigned CurOp = 2; 349 SmallVector<SDValue, 8> Operands; 350 351 Operands.push_back(Node->getOperand(CurOp++)); 352 353 MVT IndexVT; 354 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 355 /*IsStridedOrIndexed*/ true, Operands, 356 /*IsLoad=*/true, &IndexVT); 357 358 #ifndef NDEBUG 359 // Number of element = RVVBitsPerBlock * LMUL / SEW 360 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW; 361 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL); 362 if (DecodedLMUL.second) 363 ContainedTyNumElts /= DecodedLMUL.first; 364 else 365 ContainedTyNumElts *= DecodedLMUL.first; 366 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() && 367 "Element count mismatch"); 368 #endif 369 370 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 371 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 372 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 373 report_fatal_error("The V extension does not support EEW=64 for index " 374 "values when XLEN=32"); 375 } 376 const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( 377 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 378 static_cast<unsigned>(IndexLMUL)); 379 MachineSDNode *Load = 380 CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); 381 382 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()}); 383 384 ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); 385 ReplaceUses(SDValue(Node, 1), SDValue(Load, 1)); 386 CurDAG->RemoveDeadNode(Node); 387 } 388 389 void RISCVDAGToDAGISel::selectVSSEG(SDNode *Node, unsigned NF, bool IsMasked, 390 bool IsStrided) { 391 SDLoc DL(Node); 392 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 393 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 394 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 395 396 unsigned CurOp = 2; 397 SmallVector<SDValue, 8> Operands; 398 399 Operands.push_back(Node->getOperand(CurOp++)); 400 401 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 402 Operands); 403 404 const RISCV::VSSEGPseudo *P = RISCV::getVSSEGPseudo( 405 NF, IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 406 MachineSDNode *Store = 407 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 408 409 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()}); 410 411 ReplaceNode(Node, Store); 412 } 413 414 void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, unsigned NF, bool IsMasked, 415 bool IsOrdered) { 416 SDLoc DL(Node); 417 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 418 unsigned Log2SEW = Node->getConstantOperandVal(Node->getNumOperands() - 1); 419 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 420 421 unsigned CurOp = 2; 422 SmallVector<SDValue, 8> Operands; 423 424 Operands.push_back(Node->getOperand(CurOp++)); 425 426 MVT IndexVT; 427 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 428 /*IsStridedOrIndexed*/ true, Operands, 429 /*IsLoad=*/false, &IndexVT); 430 431 #ifndef NDEBUG 432 // Number of element = RVVBitsPerBlock * LMUL / SEW 433 unsigned ContainedTyNumElts = RISCV::RVVBitsPerBlock >> Log2SEW; 434 auto DecodedLMUL = RISCVVType::decodeVLMUL(LMUL); 435 if (DecodedLMUL.second) 436 ContainedTyNumElts /= DecodedLMUL.first; 437 else 438 ContainedTyNumElts *= DecodedLMUL.first; 439 assert(ContainedTyNumElts == IndexVT.getVectorMinNumElements() && 440 "Element count mismatch"); 441 #endif 442 443 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 444 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 445 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 446 report_fatal_error("The V extension does not support EEW=64 for index " 447 "values when XLEN=32"); 448 } 449 const RISCV::VSXSEGPseudo *P = RISCV::getVSXSEGPseudo( 450 NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 451 static_cast<unsigned>(IndexLMUL)); 452 MachineSDNode *Store = 453 CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), Operands); 454 455 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()}); 456 457 ReplaceNode(Node, Store); 458 } 459 460 void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { 461 if (!Subtarget->hasVInstructions()) 462 return; 463 464 assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode"); 465 466 SDLoc DL(Node); 467 MVT XLenVT = Subtarget->getXLenVT(); 468 469 unsigned IntNo = Node->getConstantOperandVal(0); 470 471 assert((IntNo == Intrinsic::riscv_vsetvli || 472 IntNo == Intrinsic::riscv_vsetvlimax) && 473 "Unexpected vsetvli intrinsic"); 474 475 bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; 476 unsigned Offset = (VLMax ? 1 : 2); 477 478 assert(Node->getNumOperands() == Offset + 2 && 479 "Unexpected number of operands"); 480 481 unsigned SEW = 482 RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); 483 RISCVVType::VLMUL VLMul = static_cast<RISCVVType::VLMUL>( 484 Node->getConstantOperandVal(Offset + 1) & 0x7); 485 486 unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, 487 /*MaskAgnostic*/ true); 488 SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); 489 490 SDValue VLOperand; 491 unsigned Opcode = RISCV::PseudoVSETVLI; 492 if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) { 493 if (auto VLEN = Subtarget->getRealVLen()) 494 if (*VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue()) 495 VLMax = true; 496 } 497 if (VLMax || isAllOnesConstant(Node->getOperand(1))) { 498 VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); 499 Opcode = RISCV::PseudoVSETVLIX0; 500 } else { 501 VLOperand = Node->getOperand(1); 502 503 if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) { 504 uint64_t AVL = C->getZExtValue(); 505 if (isUInt<5>(AVL)) { 506 SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); 507 ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, 508 XLenVT, VLImm, VTypeIOp)); 509 return; 510 } 511 } 512 } 513 514 ReplaceNode(Node, 515 CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp)); 516 } 517 518 bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { 519 MVT VT = Node->getSimpleValueType(0); 520 unsigned Opcode = Node->getOpcode(); 521 assert((Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR) && 522 "Unexpected opcode"); 523 SDLoc DL(Node); 524 525 // For operations of the form (x << C1) op C2, check if we can use 526 // ANDI/ORI/XORI by transforming it into (x op (C2>>C1)) << C1. 527 SDValue N0 = Node->getOperand(0); 528 SDValue N1 = Node->getOperand(1); 529 530 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); 531 if (!Cst) 532 return false; 533 534 int64_t Val = Cst->getSExtValue(); 535 536 // Check if immediate can already use ANDI/ORI/XORI. 537 if (isInt<12>(Val)) 538 return false; 539 540 SDValue Shift = N0; 541 542 // If Val is simm32 and we have a sext_inreg from i32, then the binop 543 // produces at least 33 sign bits. We can peek through the sext_inreg and use 544 // a SLLIW at the end. 545 bool SignExt = false; 546 if (isInt<32>(Val) && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 547 N0.hasOneUse() && cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i32) { 548 SignExt = true; 549 Shift = N0.getOperand(0); 550 } 551 552 if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) 553 return false; 554 555 ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 556 if (!ShlCst) 557 return false; 558 559 uint64_t ShAmt = ShlCst->getZExtValue(); 560 561 // Make sure that we don't change the operation by removing bits. 562 // This only matters for OR and XOR, AND is unaffected. 563 uint64_t RemovedBitsMask = maskTrailingOnes<uint64_t>(ShAmt); 564 if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) 565 return false; 566 567 int64_t ShiftedVal = Val >> ShAmt; 568 if (!isInt<12>(ShiftedVal)) 569 return false; 570 571 // If we peeked through a sext_inreg, make sure the shift is valid for SLLIW. 572 if (SignExt && ShAmt >= 32) 573 return false; 574 575 // Ok, we can reorder to get a smaller immediate. 576 unsigned BinOpc; 577 switch (Opcode) { 578 default: llvm_unreachable("Unexpected opcode"); 579 case ISD::AND: BinOpc = RISCV::ANDI; break; 580 case ISD::OR: BinOpc = RISCV::ORI; break; 581 case ISD::XOR: BinOpc = RISCV::XORI; break; 582 } 583 584 unsigned ShOpc = SignExt ? RISCV::SLLIW : RISCV::SLLI; 585 586 SDNode *BinOp = CurDAG->getMachineNode( 587 BinOpc, DL, VT, Shift.getOperand(0), 588 CurDAG->getSignedTargetConstant(ShiftedVal, DL, VT)); 589 SDNode *SLLI = 590 CurDAG->getMachineNode(ShOpc, DL, VT, SDValue(BinOp, 0), 591 CurDAG->getTargetConstant(ShAmt, DL, VT)); 592 ReplaceNode(Node, SLLI); 593 return true; 594 } 595 596 bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { 597 unsigned Opc; 598 599 if (Subtarget->hasVendorXTHeadBb()) 600 Opc = RISCV::TH_EXT; 601 else if (Subtarget->hasVendorXAndesPerf()) 602 Opc = RISCV::NDS_BFOS; 603 else if (Subtarget->hasVendorXqcibm()) 604 Opc = RISCV::QC_EXT; 605 else 606 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment. 607 return false; 608 609 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 610 if (!N1C) 611 return false; 612 613 SDValue N0 = Node->getOperand(0); 614 if (!N0.hasOneUse()) 615 return false; 616 617 auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, 618 const SDLoc &DL, MVT VT) { 619 if (Opc == RISCV::QC_EXT) { 620 // QC.EXT X, width, shamt 621 // shamt is the same as Lsb 622 // width is the number of bits to extract from the Lsb 623 Msb = Msb - Lsb + 1; 624 } 625 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0), 626 CurDAG->getTargetConstant(Msb, DL, VT), 627 CurDAG->getTargetConstant(Lsb, DL, VT)); 628 }; 629 630 SDLoc DL(Node); 631 MVT VT = Node->getSimpleValueType(0); 632 const unsigned RightShAmt = N1C->getZExtValue(); 633 634 // Transform (sra (shl X, C1) C2) with C1 < C2 635 // -> (SignedBitfieldExtract X, msb, lsb) 636 if (N0.getOpcode() == ISD::SHL) { 637 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 638 if (!N01C) 639 return false; 640 641 const unsigned LeftShAmt = N01C->getZExtValue(); 642 // Make sure that this is a bitfield extraction (i.e., the shift-right 643 // amount can not be less than the left-shift). 644 if (LeftShAmt > RightShAmt) 645 return false; 646 647 const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; 648 const unsigned Msb = MsbPlusOne - 1; 649 const unsigned Lsb = RightShAmt - LeftShAmt; 650 651 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT); 652 ReplaceNode(Node, Sbe); 653 return true; 654 } 655 656 // Transform (sra (sext_inreg X, _), C) -> 657 // (SignedBitfieldExtract X, msb, lsb) 658 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { 659 unsigned ExtSize = 660 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 661 662 // ExtSize of 32 should use sraiw via tablegen pattern. 663 if (ExtSize == 32) 664 return false; 665 666 const unsigned Msb = ExtSize - 1; 667 // If the shift-right amount is greater than Msb, it means that extracts 668 // the X[Msb] bit and sign-extend it. 669 const unsigned Lsb = RightShAmt > Msb ? Msb : RightShAmt; 670 671 SDNode *Sbe = BitfieldExtract(N0, Msb, Lsb, DL, VT); 672 ReplaceNode(Node, Sbe); 673 return true; 674 } 675 676 return false; 677 } 678 679 bool RISCVDAGToDAGISel::trySignedBitfieldInsertInMask(SDNode *Node) { 680 // Supported only in Xqcibm for now. 681 if (!Subtarget->hasVendorXqcibm()) 682 return false; 683 684 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 685 if (!N1C) 686 return false; 687 688 int32_t C1 = N1C->getSExtValue(); 689 if (!isShiftedMask_32(C1) || isInt<12>(C1)) 690 return false; 691 692 // INSBI will clobber the input register in N0. Bail out if we need a copy to 693 // preserve this value. 694 SDValue N0 = Node->getOperand(0); 695 if (!N0.hasOneUse()) 696 return false; 697 698 // If C1 is a shifted mask (but can't be formed as an ORI), 699 // use a bitfield insert of -1. 700 // Transform (or x, C1) 701 // -> (qc.insbi x, -1, width, shift) 702 const unsigned Leading = llvm::countl_zero((uint32_t)C1); 703 const unsigned Trailing = llvm::countr_zero((uint32_t)C1); 704 const unsigned Width = 32 - Leading - Trailing; 705 706 // If Zbs is enabled and it is a single bit set we can use BSETI which 707 // can be compressed to C_BSETI when Xqcibm in enabled. 708 if (Width == 1 && Subtarget->hasStdExtZbs()) 709 return false; 710 711 SDLoc DL(Node); 712 MVT VT = Node->getSimpleValueType(0); 713 714 SDValue Ops[] = {N0, CurDAG->getSignedTargetConstant(-1, DL, VT), 715 CurDAG->getTargetConstant(Width, DL, VT), 716 CurDAG->getTargetConstant(Trailing, DL, VT)}; 717 SDNode *BitIns = CurDAG->getMachineNode(RISCV::QC_INSBI, DL, VT, Ops); 718 ReplaceNode(Node, BitIns); 719 return true; 720 } 721 722 bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) { 723 // Only supported with XAndesPerf at the moment. 724 if (!Subtarget->hasVendorXAndesPerf()) 725 return false; 726 727 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 728 if (!N1C) 729 return false; 730 731 SDValue N0 = Node->getOperand(0); 732 if (!N0.hasOneUse()) 733 return false; 734 735 auto BitfieldInsert = [&](SDValue N0, unsigned Msb, unsigned Lsb, 736 const SDLoc &DL, MVT VT) { 737 unsigned Opc = RISCV::NDS_BFOS; 738 // If the Lsb is equal to the Msb, then the Lsb should be 0. 739 if (Lsb == Msb) 740 Lsb = 0; 741 return CurDAG->getMachineNode(Opc, DL, VT, N0.getOperand(0), 742 CurDAG->getTargetConstant(Lsb, DL, VT), 743 CurDAG->getTargetConstant(Msb, DL, VT)); 744 }; 745 746 SDLoc DL(Node); 747 MVT VT = Node->getSimpleValueType(0); 748 const unsigned RightShAmt = N1C->getZExtValue(); 749 750 // Transform (sra (shl X, C1) C2) with C1 > C2 751 // -> (NDS.BFOS X, lsb, msb) 752 if (N0.getOpcode() == ISD::SHL) { 753 auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 754 if (!N01C) 755 return false; 756 757 const unsigned LeftShAmt = N01C->getZExtValue(); 758 // Make sure that this is a bitfield insertion (i.e., the shift-right 759 // amount should be less than the left-shift). 760 if (LeftShAmt <= RightShAmt) 761 return false; 762 763 const unsigned MsbPlusOne = VT.getSizeInBits() - RightShAmt; 764 const unsigned Msb = MsbPlusOne - 1; 765 const unsigned Lsb = LeftShAmt - RightShAmt; 766 767 SDNode *Sbi = BitfieldInsert(N0, Msb, Lsb, DL, VT); 768 ReplaceNode(Node, Sbi); 769 return true; 770 } 771 772 return false; 773 } 774 775 bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node, 776 const SDLoc &DL, MVT VT, 777 SDValue X, unsigned Msb, 778 unsigned Lsb) { 779 unsigned Opc; 780 781 if (Subtarget->hasVendorXTHeadBb()) { 782 Opc = RISCV::TH_EXTU; 783 } else if (Subtarget->hasVendorXAndesPerf()) { 784 Opc = RISCV::NDS_BFOZ; 785 } else if (Subtarget->hasVendorXqcibm()) { 786 Opc = RISCV::QC_EXTU; 787 // QC.EXTU X, width, shamt 788 // shamt is the same as Lsb 789 // width is the number of bits to extract from the Lsb 790 Msb = Msb - Lsb + 1; 791 } else { 792 // Only supported with XTHeadBb/XAndesPerf/Xqcibm at the moment. 793 return false; 794 } 795 796 SDNode *Ube = CurDAG->getMachineNode(Opc, DL, VT, X, 797 CurDAG->getTargetConstant(Msb, DL, VT), 798 CurDAG->getTargetConstant(Lsb, DL, VT)); 799 ReplaceNode(Node, Ube); 800 return true; 801 } 802 803 bool RISCVDAGToDAGISel::tryUnsignedBitfieldInsertInZero(SDNode *Node, 804 const SDLoc &DL, MVT VT, 805 SDValue X, unsigned Msb, 806 unsigned Lsb) { 807 // Only supported with XAndesPerf at the moment. 808 if (!Subtarget->hasVendorXAndesPerf()) 809 return false; 810 811 unsigned Opc = RISCV::NDS_BFOZ; 812 813 // If the Lsb is equal to the Msb, then the Lsb should be 0. 814 if (Lsb == Msb) 815 Lsb = 0; 816 SDNode *Ubi = CurDAG->getMachineNode(Opc, DL, VT, X, 817 CurDAG->getTargetConstant(Lsb, DL, VT), 818 CurDAG->getTargetConstant(Msb, DL, VT)); 819 ReplaceNode(Node, Ubi); 820 return true; 821 } 822 823 bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { 824 // Target does not support indexed loads. 825 if (!Subtarget->hasVendorXTHeadMemIdx()) 826 return false; 827 828 LoadSDNode *Ld = cast<LoadSDNode>(Node); 829 ISD::MemIndexedMode AM = Ld->getAddressingMode(); 830 if (AM == ISD::UNINDEXED) 831 return false; 832 833 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset()); 834 if (!C) 835 return false; 836 837 EVT LoadVT = Ld->getMemoryVT(); 838 assert((AM == ISD::PRE_INC || AM == ISD::POST_INC) && 839 "Unexpected addressing mode"); 840 bool IsPre = AM == ISD::PRE_INC; 841 bool IsPost = AM == ISD::POST_INC; 842 int64_t Offset = C->getSExtValue(); 843 844 // The constants that can be encoded in the THeadMemIdx instructions 845 // are of the form (sign_extend(imm5) << imm2). 846 unsigned Shift; 847 for (Shift = 0; Shift < 4; Shift++) 848 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) 849 break; 850 851 // Constant cannot be encoded. 852 if (Shift == 4) 853 return false; 854 855 bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD); 856 unsigned Opcode; 857 if (LoadVT == MVT::i8 && IsPre) 858 Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB; 859 else if (LoadVT == MVT::i8 && IsPost) 860 Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA; 861 else if (LoadVT == MVT::i16 && IsPre) 862 Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB; 863 else if (LoadVT == MVT::i16 && IsPost) 864 Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA; 865 else if (LoadVT == MVT::i32 && IsPre) 866 Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB; 867 else if (LoadVT == MVT::i32 && IsPost) 868 Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA; 869 else if (LoadVT == MVT::i64 && IsPre) 870 Opcode = RISCV::TH_LDIB; 871 else if (LoadVT == MVT::i64 && IsPost) 872 Opcode = RISCV::TH_LDIA; 873 else 874 return false; 875 876 EVT Ty = Ld->getOffset().getValueType(); 877 SDValue Ops[] = { 878 Ld->getBasePtr(), 879 CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(Node), Ty), 880 CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty), Ld->getChain()}; 881 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0), 882 Ld->getValueType(1), MVT::Other, Ops); 883 884 MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand(); 885 CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp}); 886 887 ReplaceNode(Node, New); 888 889 return true; 890 } 891 892 void RISCVDAGToDAGISel::selectSF_VC_X_SE(SDNode *Node) { 893 if (!Subtarget->hasVInstructions()) 894 return; 895 896 assert(Node->getOpcode() == ISD::INTRINSIC_VOID && "Unexpected opcode"); 897 898 SDLoc DL(Node); 899 unsigned IntNo = Node->getConstantOperandVal(1); 900 901 assert((IntNo == Intrinsic::riscv_sf_vc_x_se || 902 IntNo == Intrinsic::riscv_sf_vc_i_se) && 903 "Unexpected vsetvli intrinsic"); 904 905 // imm, imm, imm, simm5/scalar, sew, log2lmul, vl 906 unsigned Log2SEW = Log2_32(Node->getConstantOperandVal(6)); 907 SDValue SEWOp = 908 CurDAG->getTargetConstant(Log2SEW, DL, Subtarget->getXLenVT()); 909 SmallVector<SDValue, 8> Operands = {Node->getOperand(2), Node->getOperand(3), 910 Node->getOperand(4), Node->getOperand(5), 911 Node->getOperand(8), SEWOp, 912 Node->getOperand(0)}; 913 914 unsigned Opcode; 915 auto *LMulSDNode = cast<ConstantSDNode>(Node->getOperand(7)); 916 switch (LMulSDNode->getSExtValue()) { 917 case 5: 918 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF8 919 : RISCV::PseudoSF_VC_I_SE_MF8; 920 break; 921 case 6: 922 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF4 923 : RISCV::PseudoSF_VC_I_SE_MF4; 924 break; 925 case 7: 926 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_MF2 927 : RISCV::PseudoSF_VC_I_SE_MF2; 928 break; 929 case 0: 930 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M1 931 : RISCV::PseudoSF_VC_I_SE_M1; 932 break; 933 case 1: 934 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M2 935 : RISCV::PseudoSF_VC_I_SE_M2; 936 break; 937 case 2: 938 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M4 939 : RISCV::PseudoSF_VC_I_SE_M4; 940 break; 941 case 3: 942 Opcode = IntNo == Intrinsic::riscv_sf_vc_x_se ? RISCV::PseudoSF_VC_X_SE_M8 943 : RISCV::PseudoSF_VC_I_SE_M8; 944 break; 945 } 946 947 ReplaceNode(Node, CurDAG->getMachineNode( 948 Opcode, DL, Node->getSimpleValueType(0), Operands)); 949 } 950 951 static unsigned getSegInstNF(unsigned Intrinsic) { 952 #define INST_NF_CASE(NAME, NF) \ 953 case Intrinsic::riscv_##NAME##NF: \ 954 return NF; 955 #define INST_NF_CASE_MASK(NAME, NF) \ 956 case Intrinsic::riscv_##NAME##NF##_mask: \ 957 return NF; 958 #define INST_NF_CASE_FF(NAME, NF) \ 959 case Intrinsic::riscv_##NAME##NF##ff: \ 960 return NF; 961 #define INST_NF_CASE_FF_MASK(NAME, NF) \ 962 case Intrinsic::riscv_##NAME##NF##ff_mask: \ 963 return NF; 964 #define INST_ALL_NF_CASE_BASE(MACRO_NAME, NAME) \ 965 MACRO_NAME(NAME, 2) \ 966 MACRO_NAME(NAME, 3) \ 967 MACRO_NAME(NAME, 4) \ 968 MACRO_NAME(NAME, 5) \ 969 MACRO_NAME(NAME, 6) \ 970 MACRO_NAME(NAME, 7) \ 971 MACRO_NAME(NAME, 8) 972 #define INST_ALL_NF_CASE(NAME) \ 973 INST_ALL_NF_CASE_BASE(INST_NF_CASE, NAME) \ 974 INST_ALL_NF_CASE_BASE(INST_NF_CASE_MASK, NAME) 975 #define INST_ALL_NF_CASE_WITH_FF(NAME) \ 976 INST_ALL_NF_CASE(NAME) \ 977 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF, NAME) \ 978 INST_ALL_NF_CASE_BASE(INST_NF_CASE_FF_MASK, NAME) 979 switch (Intrinsic) { 980 default: 981 llvm_unreachable("Unexpected segment load/store intrinsic"); 982 INST_ALL_NF_CASE_WITH_FF(vlseg) 983 INST_ALL_NF_CASE(vlsseg) 984 INST_ALL_NF_CASE(vloxseg) 985 INST_ALL_NF_CASE(vluxseg) 986 INST_ALL_NF_CASE(vsseg) 987 INST_ALL_NF_CASE(vssseg) 988 INST_ALL_NF_CASE(vsoxseg) 989 INST_ALL_NF_CASE(vsuxseg) 990 } 991 } 992 993 void RISCVDAGToDAGISel::Select(SDNode *Node) { 994 // If we have a custom node, we have already selected. 995 if (Node->isMachineOpcode()) { 996 LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); 997 Node->setNodeId(-1); 998 return; 999 } 1000 1001 // Instruction Selection not handled by the auto-generated tablegen selection 1002 // should be handled here. 1003 unsigned Opcode = Node->getOpcode(); 1004 MVT XLenVT = Subtarget->getXLenVT(); 1005 SDLoc DL(Node); 1006 MVT VT = Node->getSimpleValueType(0); 1007 1008 bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs(); 1009 1010 switch (Opcode) { 1011 case ISD::Constant: { 1012 assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT"); 1013 auto *ConstNode = cast<ConstantSDNode>(Node); 1014 if (ConstNode->isZero()) { 1015 SDValue New = 1016 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT); 1017 ReplaceNode(Node, New.getNode()); 1018 return; 1019 } 1020 int64_t Imm = ConstNode->getSExtValue(); 1021 // If only the lower 8 bits are used, try to convert this to a simm6 by 1022 // sign-extending bit 7. This is neutral without the C extension, and 1023 // allows C.LI to be used if C is present. 1024 if (isUInt<8>(Imm) && isInt<6>(SignExtend64<8>(Imm)) && hasAllBUsers(Node)) 1025 Imm = SignExtend64<8>(Imm); 1026 // If the upper XLen-16 bits are not used, try to convert this to a simm12 1027 // by sign extending bit 15. 1028 if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && 1029 hasAllHUsers(Node)) 1030 Imm = SignExtend64<16>(Imm); 1031 // If the upper 32-bits are not used try to convert this into a simm32 by 1032 // sign extending bit 32. 1033 if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) 1034 Imm = SignExtend64<32>(Imm); 1035 1036 ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode()); 1037 return; 1038 } 1039 case ISD::ConstantFP: { 1040 const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF(); 1041 1042 bool Is64Bit = Subtarget->is64Bit(); 1043 bool HasZdinx = Subtarget->hasStdExtZdinx(); 1044 1045 bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64; 1046 SDValue Imm; 1047 // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will 1048 // create an integer immediate. 1049 if (APF.isPosZero() || NegZeroF64) { 1050 if (VT == MVT::f64 && HasZdinx && !Is64Bit) 1051 Imm = CurDAG->getRegister(RISCV::X0_Pair, MVT::f64); 1052 else 1053 Imm = CurDAG->getRegister(RISCV::X0, XLenVT); 1054 } else { 1055 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 1056 *Subtarget); 1057 } 1058 1059 unsigned Opc; 1060 switch (VT.SimpleTy) { 1061 default: 1062 llvm_unreachable("Unexpected size"); 1063 case MVT::bf16: 1064 assert(Subtarget->hasStdExtZfbfmin()); 1065 Opc = RISCV::FMV_H_X; 1066 break; 1067 case MVT::f16: 1068 Opc = Subtarget->hasStdExtZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X; 1069 break; 1070 case MVT::f32: 1071 Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X; 1072 break; 1073 case MVT::f64: 1074 // For RV32, we can't move from a GPR, we need to convert instead. This 1075 // should only happen for +0.0 and -0.0. 1076 assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant"); 1077 if (HasZdinx) 1078 Opc = RISCV::COPY; 1079 else 1080 Opc = Is64Bit ? RISCV::FMV_D_X : RISCV::FCVT_D_W; 1081 break; 1082 } 1083 1084 SDNode *Res; 1085 if (VT.SimpleTy == MVT::f16 && Opc == RISCV::COPY) { 1086 Res = 1087 CurDAG->getTargetExtractSubreg(RISCV::sub_16, DL, VT, Imm).getNode(); 1088 } else if (VT.SimpleTy == MVT::f32 && Opc == RISCV::COPY) { 1089 Res = 1090 CurDAG->getTargetExtractSubreg(RISCV::sub_32, DL, VT, Imm).getNode(); 1091 } else if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W) 1092 Res = CurDAG->getMachineNode( 1093 Opc, DL, VT, Imm, 1094 CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT)); 1095 else 1096 Res = CurDAG->getMachineNode(Opc, DL, VT, Imm); 1097 1098 // For f64 -0.0, we need to insert a fneg.d idiom. 1099 if (NegZeroF64) { 1100 Opc = RISCV::FSGNJN_D; 1101 if (HasZdinx) 1102 Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X; 1103 Res = 1104 CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0)); 1105 } 1106 1107 ReplaceNode(Node, Res); 1108 return; 1109 } 1110 case RISCVISD::BuildGPRPair: 1111 case RISCVISD::BuildPairF64: { 1112 if (Opcode == RISCVISD::BuildPairF64 && !Subtarget->hasStdExtZdinx()) 1113 break; 1114 1115 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::BuildGPRPair) && 1116 "BuildPairF64 only handled here on rv32i_zdinx"); 1117 1118 SDValue Ops[] = { 1119 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), 1120 Node->getOperand(0), 1121 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), 1122 Node->getOperand(1), 1123 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)}; 1124 1125 SDNode *N = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops); 1126 ReplaceNode(Node, N); 1127 return; 1128 } 1129 case RISCVISD::SplitGPRPair: 1130 case RISCVISD::SplitF64: { 1131 if (Subtarget->hasStdExtZdinx() || Opcode != RISCVISD::SplitF64) { 1132 assert((!Subtarget->is64Bit() || Opcode == RISCVISD::SplitGPRPair) && 1133 "SplitF64 only handled here on rv32i_zdinx"); 1134 1135 if (!SDValue(Node, 0).use_empty()) { 1136 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, 1137 Node->getValueType(0), 1138 Node->getOperand(0)); 1139 ReplaceUses(SDValue(Node, 0), Lo); 1140 } 1141 1142 if (!SDValue(Node, 1).use_empty()) { 1143 SDValue Hi = CurDAG->getTargetExtractSubreg( 1144 RISCV::sub_gpr_odd, DL, Node->getValueType(1), Node->getOperand(0)); 1145 ReplaceUses(SDValue(Node, 1), Hi); 1146 } 1147 1148 CurDAG->RemoveDeadNode(Node); 1149 return; 1150 } 1151 1152 assert(Opcode != RISCVISD::SplitGPRPair && 1153 "SplitGPRPair should already be handled"); 1154 1155 if (!Subtarget->hasStdExtZfa()) 1156 break; 1157 assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() && 1158 "Unexpected subtarget"); 1159 1160 // With Zfa, lower to fmv.x.w and fmvh.x.d. 1161 if (!SDValue(Node, 0).use_empty()) { 1162 SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT, 1163 Node->getOperand(0)); 1164 ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0)); 1165 } 1166 if (!SDValue(Node, 1).use_empty()) { 1167 SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT, 1168 Node->getOperand(0)); 1169 ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0)); 1170 } 1171 1172 CurDAG->RemoveDeadNode(Node); 1173 return; 1174 } 1175 case ISD::SHL: { 1176 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1177 if (!N1C) 1178 break; 1179 SDValue N0 = Node->getOperand(0); 1180 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || 1181 !isa<ConstantSDNode>(N0.getOperand(1))) 1182 break; 1183 unsigned ShAmt = N1C->getZExtValue(); 1184 uint64_t Mask = N0.getConstantOperandVal(1); 1185 1186 if (isShiftedMask_64(Mask)) { 1187 unsigned XLen = Subtarget->getXLen(); 1188 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 1189 unsigned TrailingZeros = llvm::countr_zero(Mask); 1190 if (ShAmt <= 32 && TrailingZeros > 0 && LeadingZeros == 32) { 1191 // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) 1192 // where C2 has 32 leading zeros and C3 trailing zeros. 1193 SDNode *SRLIW = CurDAG->getMachineNode( 1194 RISCV::SRLIW, DL, VT, N0->getOperand(0), 1195 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 1196 SDNode *SLLI = CurDAG->getMachineNode( 1197 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1198 CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); 1199 ReplaceNode(Node, SLLI); 1200 return; 1201 } 1202 if (TrailingZeros == 0 && LeadingZeros > ShAmt && 1203 XLen - LeadingZeros > 11 && LeadingZeros != 32) { 1204 // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C) 1205 // where C2 has C4 leading zeros and no trailing zeros. 1206 // This is profitable if the "and" was to be lowered to 1207 // (srli (slli X, C4), C4) and not (andi X, C2). 1208 // For "LeadingZeros == 32": 1209 // - with Zba it's just (slli.uw X, C) 1210 // - without Zba a tablegen pattern applies the very same 1211 // transform as we would have done here 1212 SDNode *SLLI = CurDAG->getMachineNode( 1213 RISCV::SLLI, DL, VT, N0->getOperand(0), 1214 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1215 SDNode *SRLI = CurDAG->getMachineNode( 1216 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1217 CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT)); 1218 ReplaceNode(Node, SRLI); 1219 return; 1220 } 1221 } 1222 break; 1223 } 1224 case ISD::SRL: { 1225 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1226 if (!N1C) 1227 break; 1228 SDValue N0 = Node->getOperand(0); 1229 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1230 break; 1231 unsigned ShAmt = N1C->getZExtValue(); 1232 uint64_t Mask = N0.getConstantOperandVal(1); 1233 1234 // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has 1235 // 32 leading zeros and C3 trailing zeros. 1236 if (isShiftedMask_64(Mask) && N0.hasOneUse()) { 1237 unsigned XLen = Subtarget->getXLen(); 1238 unsigned LeadingZeros = XLen - llvm::bit_width(Mask); 1239 unsigned TrailingZeros = llvm::countr_zero(Mask); 1240 if (LeadingZeros == 32 && TrailingZeros > ShAmt) { 1241 SDNode *SRLIW = CurDAG->getMachineNode( 1242 RISCV::SRLIW, DL, VT, N0->getOperand(0), 1243 CurDAG->getTargetConstant(TrailingZeros, DL, VT)); 1244 SDNode *SLLI = CurDAG->getMachineNode( 1245 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1246 CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); 1247 ReplaceNode(Node, SLLI); 1248 return; 1249 } 1250 } 1251 1252 // Optimize (srl (and X, C2), C) -> 1253 // (srli (slli X, (XLen-C3), (XLen-C3) + C) 1254 // Where C2 is a mask with C3 trailing ones. 1255 // Taking into account that the C2 may have had lower bits unset by 1256 // SimplifyDemandedBits. This avoids materializing the C2 immediate. 1257 // This pattern occurs when type legalizing right shifts for types with 1258 // less than XLen bits. 1259 Mask |= maskTrailingOnes<uint64_t>(ShAmt); 1260 if (!isMask_64(Mask)) 1261 break; 1262 unsigned TrailingOnes = llvm::countr_one(Mask); 1263 if (ShAmt >= TrailingOnes) 1264 break; 1265 // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64. 1266 if (TrailingOnes == 32) { 1267 SDNode *SRLI = CurDAG->getMachineNode( 1268 Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT, 1269 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1270 ReplaceNode(Node, SRLI); 1271 return; 1272 } 1273 1274 // Only do the remaining transforms if the AND has one use. 1275 if (!N0.hasOneUse()) 1276 break; 1277 1278 // If C2 is (1 << ShAmt) use bexti or th.tst if possible. 1279 if (HasBitTest && ShAmt + 1 == TrailingOnes) { 1280 SDNode *BEXTI = CurDAG->getMachineNode( 1281 Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT, 1282 N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT)); 1283 ReplaceNode(Node, BEXTI); 1284 return; 1285 } 1286 1287 const unsigned Msb = TrailingOnes - 1; 1288 const unsigned Lsb = ShAmt; 1289 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0->getOperand(0), Msb, Lsb)) 1290 return; 1291 1292 unsigned LShAmt = Subtarget->getXLen() - TrailingOnes; 1293 SDNode *SLLI = 1294 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1295 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1296 SDNode *SRLI = CurDAG->getMachineNode( 1297 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1298 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1299 ReplaceNode(Node, SRLI); 1300 return; 1301 } 1302 case ISD::SRA: { 1303 if (trySignedBitfieldExtract(Node)) 1304 return; 1305 1306 if (trySignedBitfieldInsertInSign(Node)) 1307 return; 1308 1309 // Optimize (sra (sext_inreg X, i16), C) -> 1310 // (srai (slli X, (XLen-16), (XLen-16) + C) 1311 // And (sra (sext_inreg X, i8), C) -> 1312 // (srai (slli X, (XLen-8), (XLen-8) + C) 1313 // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal. 1314 // This transform matches the code we get without Zbb. The shifts are more 1315 // compressible, and this can help expose CSE opportunities in the sdiv by 1316 // constant optimization. 1317 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1318 if (!N1C) 1319 break; 1320 SDValue N0 = Node->getOperand(0); 1321 if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse()) 1322 break; 1323 unsigned ShAmt = N1C->getZExtValue(); 1324 unsigned ExtSize = 1325 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits(); 1326 // ExtSize of 32 should use sraiw via tablegen pattern. 1327 if (ExtSize >= 32 || ShAmt >= ExtSize) 1328 break; 1329 unsigned LShAmt = Subtarget->getXLen() - ExtSize; 1330 SDNode *SLLI = 1331 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0), 1332 CurDAG->getTargetConstant(LShAmt, DL, VT)); 1333 SDNode *SRAI = CurDAG->getMachineNode( 1334 RISCV::SRAI, DL, VT, SDValue(SLLI, 0), 1335 CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT)); 1336 ReplaceNode(Node, SRAI); 1337 return; 1338 } 1339 case ISD::OR: { 1340 if (trySignedBitfieldInsertInMask(Node)) 1341 return; 1342 1343 if (tryShrinkShlLogicImm(Node)) 1344 return; 1345 1346 break; 1347 } 1348 case ISD::XOR: 1349 if (tryShrinkShlLogicImm(Node)) 1350 return; 1351 1352 break; 1353 case ISD::AND: { 1354 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1355 if (!N1C) 1356 break; 1357 1358 SDValue N0 = Node->getOperand(0); 1359 1360 bool LeftShift = N0.getOpcode() == ISD::SHL; 1361 if (LeftShift || N0.getOpcode() == ISD::SRL) { 1362 auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 1363 if (!C) 1364 break; 1365 unsigned C2 = C->getZExtValue(); 1366 unsigned XLen = Subtarget->getXLen(); 1367 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 1368 1369 // Keep track of whether this is a c.andi. If we can't use c.andi, the 1370 // shift pair might offer more compression opportunities. 1371 // TODO: We could check for C extension here, but we don't have many lit 1372 // tests with the C extension enabled so not checking gets better 1373 // coverage. 1374 // TODO: What if ANDI faster than shift? 1375 bool IsCANDI = isInt<6>(N1C->getSExtValue()); 1376 1377 uint64_t C1 = N1C->getZExtValue(); 1378 1379 // Clear irrelevant bits in the mask. 1380 if (LeftShift) 1381 C1 &= maskTrailingZeros<uint64_t>(C2); 1382 else 1383 C1 &= maskTrailingOnes<uint64_t>(XLen - C2); 1384 1385 // Some transforms should only be done if the shift has a single use or 1386 // the AND would become (srli (slli X, 32), 32) 1387 bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF); 1388 1389 SDValue X = N0.getOperand(0); 1390 1391 // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask 1392 // with c3 leading zeros. 1393 if (!LeftShift && isMask_64(C1)) { 1394 unsigned Leading = XLen - llvm::bit_width(C1); 1395 if (C2 < Leading) { 1396 // If the number of leading zeros is C2+32 this can be SRLIW. 1397 if (C2 + 32 == Leading) { 1398 SDNode *SRLIW = CurDAG->getMachineNode( 1399 RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); 1400 ReplaceNode(Node, SRLIW); 1401 return; 1402 } 1403 1404 // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) 1405 // if c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1. 1406 // 1407 // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type 1408 // legalized and goes through DAG combine. 1409 if (C2 >= 32 && (Leading - C2) == 1 && N0.hasOneUse() && 1410 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1411 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { 1412 SDNode *SRAIW = 1413 CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), 1414 CurDAG->getTargetConstant(31, DL, VT)); 1415 SDNode *SRLIW = CurDAG->getMachineNode( 1416 RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), 1417 CurDAG->getTargetConstant(Leading - 32, DL, VT)); 1418 ReplaceNode(Node, SRLIW); 1419 return; 1420 } 1421 1422 // Try to use an unsigned bitfield extract (e.g., th.extu) if 1423 // available. 1424 // Transform (and (srl x, C2), C1) 1425 // -> (<bfextract> x, msb, lsb) 1426 // 1427 // Make sure to keep this below the SRLIW cases, as we always want to 1428 // prefer the more common instruction. 1429 const unsigned Msb = llvm::bit_width(C1) + C2 - 1; 1430 const unsigned Lsb = C2; 1431 if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) 1432 return; 1433 1434 // (srli (slli x, c3-c2), c3). 1435 // Skip if we could use (zext.w (sraiw X, C2)). 1436 bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && 1437 X.getOpcode() == ISD::SIGN_EXTEND_INREG && 1438 cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; 1439 // Also Skip if we can use bexti or th.tst. 1440 Skip |= HasBitTest && Leading == XLen - 1; 1441 if (OneUseOrZExtW && !Skip) { 1442 SDNode *SLLI = CurDAG->getMachineNode( 1443 RISCV::SLLI, DL, VT, X, 1444 CurDAG->getTargetConstant(Leading - C2, DL, VT)); 1445 SDNode *SRLI = CurDAG->getMachineNode( 1446 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1447 CurDAG->getTargetConstant(Leading, DL, VT)); 1448 ReplaceNode(Node, SRLI); 1449 return; 1450 } 1451 } 1452 } 1453 1454 // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask 1455 // shifted by c2 bits with c3 leading zeros. 1456 if (LeftShift && isShiftedMask_64(C1)) { 1457 unsigned Leading = XLen - llvm::bit_width(C1); 1458 1459 if (C2 + Leading < XLen && 1460 C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + Leading)) << C2)) { 1461 // Use slli.uw when possible. 1462 if ((XLen - (C2 + Leading)) == 32 && Subtarget->hasStdExtZba()) { 1463 SDNode *SLLI_UW = 1464 CurDAG->getMachineNode(RISCV::SLLI_UW, DL, VT, X, 1465 CurDAG->getTargetConstant(C2, DL, VT)); 1466 ReplaceNode(Node, SLLI_UW); 1467 return; 1468 } 1469 1470 // Try to use an unsigned bitfield insert (e.g., nds.bfoz) if 1471 // available. 1472 // Transform (and (shl x, c2), c1) 1473 // -> (<bfinsert> x, msb, lsb) 1474 // e.g. 1475 // (and (shl x, 12), 0x00fff000) 1476 // If XLen = 32 and C2 = 12, then 1477 // Msb = 32 - 8 - 1 = 23 and Lsb = 12 1478 const unsigned Msb = XLen - Leading - 1; 1479 const unsigned Lsb = C2; 1480 if (tryUnsignedBitfieldInsertInZero(Node, DL, VT, X, Msb, Lsb)) 1481 return; 1482 1483 // (srli (slli c2+c3), c3) 1484 if (OneUseOrZExtW && !IsCANDI) { 1485 SDNode *SLLI = CurDAG->getMachineNode( 1486 RISCV::SLLI, DL, VT, X, 1487 CurDAG->getTargetConstant(C2 + Leading, DL, VT)); 1488 SDNode *SRLI = CurDAG->getMachineNode( 1489 RISCV::SRLI, DL, VT, SDValue(SLLI, 0), 1490 CurDAG->getTargetConstant(Leading, DL, VT)); 1491 ReplaceNode(Node, SRLI); 1492 return; 1493 } 1494 } 1495 } 1496 1497 // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a 1498 // shifted mask with c2 leading zeros and c3 trailing zeros. 1499 if (!LeftShift && isShiftedMask_64(C1)) { 1500 unsigned Leading = XLen - llvm::bit_width(C1); 1501 unsigned Trailing = llvm::countr_zero(C1); 1502 if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW && 1503 !IsCANDI) { 1504 unsigned SrliOpc = RISCV::SRLI; 1505 // If the input is zexti32 we should use SRLIW. 1506 if (X.getOpcode() == ISD::AND && 1507 isa<ConstantSDNode>(X.getOperand(1)) && 1508 X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { 1509 SrliOpc = RISCV::SRLIW; 1510 X = X.getOperand(0); 1511 } 1512 SDNode *SRLI = CurDAG->getMachineNode( 1513 SrliOpc, DL, VT, X, 1514 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1515 SDNode *SLLI = CurDAG->getMachineNode( 1516 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1517 CurDAG->getTargetConstant(Trailing, DL, VT)); 1518 ReplaceNode(Node, SLLI); 1519 return; 1520 } 1521 // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. 1522 if (Leading > 32 && (Leading - 32) == C2 && C2 + Trailing < 32 && 1523 OneUseOrZExtW && !IsCANDI) { 1524 SDNode *SRLIW = CurDAG->getMachineNode( 1525 RISCV::SRLIW, DL, VT, X, 1526 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1527 SDNode *SLLI = CurDAG->getMachineNode( 1528 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1529 CurDAG->getTargetConstant(Trailing, DL, VT)); 1530 ReplaceNode(Node, SLLI); 1531 return; 1532 } 1533 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI. 1534 if (Trailing > 0 && Leading + Trailing == 32 && C2 + Trailing < XLen && 1535 OneUseOrZExtW && Subtarget->hasStdExtZba()) { 1536 SDNode *SRLI = CurDAG->getMachineNode( 1537 RISCV::SRLI, DL, VT, X, 1538 CurDAG->getTargetConstant(C2 + Trailing, DL, VT)); 1539 SDNode *SLLI_UW = CurDAG->getMachineNode( 1540 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0), 1541 CurDAG->getTargetConstant(Trailing, DL, VT)); 1542 ReplaceNode(Node, SLLI_UW); 1543 return; 1544 } 1545 } 1546 1547 // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a 1548 // shifted mask with no leading zeros and c3 trailing zeros. 1549 if (LeftShift && isShiftedMask_64(C1)) { 1550 unsigned Leading = XLen - llvm::bit_width(C1); 1551 unsigned Trailing = llvm::countr_zero(C1); 1552 if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) { 1553 SDNode *SRLI = CurDAG->getMachineNode( 1554 RISCV::SRLI, DL, VT, X, 1555 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1556 SDNode *SLLI = CurDAG->getMachineNode( 1557 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1558 CurDAG->getTargetConstant(Trailing, DL, VT)); 1559 ReplaceNode(Node, SLLI); 1560 return; 1561 } 1562 // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. 1563 if (C2 < Trailing && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { 1564 SDNode *SRLIW = CurDAG->getMachineNode( 1565 RISCV::SRLIW, DL, VT, X, 1566 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1567 SDNode *SLLI = CurDAG->getMachineNode( 1568 RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), 1569 CurDAG->getTargetConstant(Trailing, DL, VT)); 1570 ReplaceNode(Node, SLLI); 1571 return; 1572 } 1573 1574 // If we have 32 bits in the mask, we can use SLLI_UW instead of SLLI. 1575 if (C2 < Trailing && Leading + Trailing == 32 && OneUseOrZExtW && 1576 Subtarget->hasStdExtZba()) { 1577 SDNode *SRLI = CurDAG->getMachineNode( 1578 RISCV::SRLI, DL, VT, X, 1579 CurDAG->getTargetConstant(Trailing - C2, DL, VT)); 1580 SDNode *SLLI_UW = CurDAG->getMachineNode( 1581 RISCV::SLLI_UW, DL, VT, SDValue(SRLI, 0), 1582 CurDAG->getTargetConstant(Trailing, DL, VT)); 1583 ReplaceNode(Node, SLLI_UW); 1584 return; 1585 } 1586 } 1587 } 1588 1589 const uint64_t C1 = N1C->getZExtValue(); 1590 1591 if (N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) && 1592 N0.hasOneUse()) { 1593 unsigned C2 = N0.getConstantOperandVal(1); 1594 unsigned XLen = Subtarget->getXLen(); 1595 assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); 1596 1597 SDValue X = N0.getOperand(0); 1598 1599 // Prefer SRAIW + ANDI when possible. 1600 bool Skip = C2 > 32 && isInt<12>(N1C->getSExtValue()) && 1601 X.getOpcode() == ISD::SHL && 1602 isa<ConstantSDNode>(X.getOperand(1)) && 1603 X.getConstantOperandVal(1) == 32; 1604 // Turn (and (sra x, c2), c1) -> (srli (srai x, c2-c3), c3) if c1 is a 1605 // mask with c3 leading zeros and c2 is larger than c3. 1606 if (isMask_64(C1) && !Skip) { 1607 unsigned Leading = XLen - llvm::bit_width(C1); 1608 if (C2 > Leading) { 1609 SDNode *SRAI = CurDAG->getMachineNode( 1610 RISCV::SRAI, DL, VT, X, 1611 CurDAG->getTargetConstant(C2 - Leading, DL, VT)); 1612 SDNode *SRLI = CurDAG->getMachineNode( 1613 RISCV::SRLI, DL, VT, SDValue(SRAI, 0), 1614 CurDAG->getTargetConstant(Leading, DL, VT)); 1615 ReplaceNode(Node, SRLI); 1616 return; 1617 } 1618 } 1619 1620 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3 1621 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can 1622 // use (slli (srli (srai y, c2 - c3), c3 + c4), c4). 1623 if (isShiftedMask_64(C1) && !Skip) { 1624 unsigned Leading = XLen - llvm::bit_width(C1); 1625 unsigned Trailing = llvm::countr_zero(C1); 1626 if (C2 > Leading && Leading > 0 && Trailing > 0) { 1627 SDNode *SRAI = CurDAG->getMachineNode( 1628 RISCV::SRAI, DL, VT, N0.getOperand(0), 1629 CurDAG->getTargetConstant(C2 - Leading, DL, VT)); 1630 SDNode *SRLI = CurDAG->getMachineNode( 1631 RISCV::SRLI, DL, VT, SDValue(SRAI, 0), 1632 CurDAG->getTargetConstant(Leading + Trailing, DL, VT)); 1633 SDNode *SLLI = CurDAG->getMachineNode( 1634 RISCV::SLLI, DL, VT, SDValue(SRLI, 0), 1635 CurDAG->getTargetConstant(Trailing, DL, VT)); 1636 ReplaceNode(Node, SLLI); 1637 return; 1638 } 1639 } 1640 } 1641 1642 // If C1 masks off the upper bits only (but can't be formed as an 1643 // ANDI), use an unsigned bitfield extract (e.g., th.extu), if 1644 // available. 1645 // Transform (and x, C1) 1646 // -> (<bfextract> x, msb, lsb) 1647 if (isMask_64(C1) && !isInt<12>(N1C->getSExtValue())) { 1648 const unsigned Msb = llvm::bit_width(C1) - 1; 1649 if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) 1650 return; 1651 } 1652 1653 if (tryShrinkShlLogicImm(Node)) 1654 return; 1655 1656 break; 1657 } 1658 case ISD::MUL: { 1659 // Special case for calculating (mul (and X, C2), C1) where the full product 1660 // fits in XLen bits. We can shift X left by the number of leading zeros in 1661 // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final 1662 // product has XLen trailing zeros, putting it in the output of MULHU. This 1663 // can avoid materializing a constant in a register for C2. 1664 1665 // RHS should be a constant. 1666 auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); 1667 if (!N1C || !N1C->hasOneUse()) 1668 break; 1669 1670 // LHS should be an AND with constant. 1671 SDValue N0 = Node->getOperand(0); 1672 if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) 1673 break; 1674 1675 uint64_t C2 = N0.getConstantOperandVal(1); 1676 1677 // Constant should be a mask. 1678 if (!isMask_64(C2)) 1679 break; 1680 1681 // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has 1682 // multiple users or the constant is a simm12. This prevents inserting a 1683 // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely 1684 // make it more costly to materialize. Otherwise, using a SLLI might allow 1685 // it to be compressed. 1686 bool IsANDIOrZExt = 1687 isInt<12>(C2) || 1688 (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()); 1689 // With XTHeadBb, we can use TH.EXTU. 1690 IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb(); 1691 if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse())) 1692 break; 1693 // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or 1694 // the constant is a simm32. 1695 bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba(); 1696 // With XTHeadBb, we can use TH.EXTU. 1697 IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb(); 1698 if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse())) 1699 break; 1700 1701 // We need to shift left the AND input and C1 by a total of XLen bits. 1702 1703 // How far left do we need to shift the AND input? 1704 unsigned XLen = Subtarget->getXLen(); 1705 unsigned LeadingZeros = XLen - llvm::bit_width(C2); 1706 1707 // The constant gets shifted by the remaining amount unless that would 1708 // shift bits out. 1709 uint64_t C1 = N1C->getZExtValue(); 1710 unsigned ConstantShift = XLen - LeadingZeros; 1711 if (ConstantShift > (XLen - llvm::bit_width(C1))) 1712 break; 1713 1714 uint64_t ShiftedC1 = C1 << ConstantShift; 1715 // If this RV32, we need to sign extend the constant. 1716 if (XLen == 32) 1717 ShiftedC1 = SignExtend64<32>(ShiftedC1); 1718 1719 // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). 1720 SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode(); 1721 SDNode *SLLI = 1722 CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), 1723 CurDAG->getTargetConstant(LeadingZeros, DL, VT)); 1724 SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, 1725 SDValue(SLLI, 0), SDValue(Imm, 0)); 1726 ReplaceNode(Node, MULHU); 1727 return; 1728 } 1729 case ISD::LOAD: { 1730 if (tryIndexedLoad(Node)) 1731 return; 1732 1733 if (Subtarget->hasVendorXCVmem() && !Subtarget->is64Bit()) { 1734 // We match post-incrementing load here 1735 LoadSDNode *Load = cast<LoadSDNode>(Node); 1736 if (Load->getAddressingMode() != ISD::POST_INC) 1737 break; 1738 1739 SDValue Chain = Node->getOperand(0); 1740 SDValue Base = Node->getOperand(1); 1741 SDValue Offset = Node->getOperand(2); 1742 1743 bool Simm12 = false; 1744 bool SignExtend = Load->getExtensionType() == ISD::SEXTLOAD; 1745 1746 if (auto ConstantOffset = dyn_cast<ConstantSDNode>(Offset)) { 1747 int ConstantVal = ConstantOffset->getSExtValue(); 1748 Simm12 = isInt<12>(ConstantVal); 1749 if (Simm12) 1750 Offset = CurDAG->getTargetConstant(ConstantVal, SDLoc(Offset), 1751 Offset.getValueType()); 1752 } 1753 1754 unsigned Opcode = 0; 1755 switch (Load->getMemoryVT().getSimpleVT().SimpleTy) { 1756 case MVT::i8: 1757 if (Simm12 && SignExtend) 1758 Opcode = RISCV::CV_LB_ri_inc; 1759 else if (Simm12 && !SignExtend) 1760 Opcode = RISCV::CV_LBU_ri_inc; 1761 else if (!Simm12 && SignExtend) 1762 Opcode = RISCV::CV_LB_rr_inc; 1763 else 1764 Opcode = RISCV::CV_LBU_rr_inc; 1765 break; 1766 case MVT::i16: 1767 if (Simm12 && SignExtend) 1768 Opcode = RISCV::CV_LH_ri_inc; 1769 else if (Simm12 && !SignExtend) 1770 Opcode = RISCV::CV_LHU_ri_inc; 1771 else if (!Simm12 && SignExtend) 1772 Opcode = RISCV::CV_LH_rr_inc; 1773 else 1774 Opcode = RISCV::CV_LHU_rr_inc; 1775 break; 1776 case MVT::i32: 1777 if (Simm12) 1778 Opcode = RISCV::CV_LW_ri_inc; 1779 else 1780 Opcode = RISCV::CV_LW_rr_inc; 1781 break; 1782 default: 1783 break; 1784 } 1785 if (!Opcode) 1786 break; 1787 1788 ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, XLenVT, XLenVT, 1789 Chain.getSimpleValueType(), Base, 1790 Offset, Chain)); 1791 return; 1792 } 1793 break; 1794 } 1795 case RISCVISD::LD_RV32: { 1796 assert(Subtarget->hasStdExtZilsd() && "LD_RV32 is only used with Zilsd"); 1797 1798 SDValue Base, Offset; 1799 SDValue Chain = Node->getOperand(0); 1800 SDValue Addr = Node->getOperand(1); 1801 SelectAddrRegImm(Addr, Base, Offset); 1802 1803 SDValue Ops[] = {Base, Offset, Chain}; 1804 MachineSDNode *New = CurDAG->getMachineNode( 1805 RISCV::LD_RV32, DL, {MVT::Untyped, MVT::Other}, Ops); 1806 SDValue Lo = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_even, DL, 1807 MVT::i32, SDValue(New, 0)); 1808 SDValue Hi = CurDAG->getTargetExtractSubreg(RISCV::sub_gpr_odd, DL, 1809 MVT::i32, SDValue(New, 0)); 1810 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()}); 1811 ReplaceUses(SDValue(Node, 0), Lo); 1812 ReplaceUses(SDValue(Node, 1), Hi); 1813 ReplaceUses(SDValue(Node, 2), SDValue(New, 1)); 1814 CurDAG->RemoveDeadNode(Node); 1815 return; 1816 } 1817 case RISCVISD::SD_RV32: { 1818 SDValue Base, Offset; 1819 SDValue Chain = Node->getOperand(0); 1820 SDValue Addr = Node->getOperand(3); 1821 SelectAddrRegImm(Addr, Base, Offset); 1822 1823 SDValue Lo = Node->getOperand(1); 1824 SDValue Hi = Node->getOperand(2); 1825 1826 SDValue RegPair; 1827 // Peephole to use X0_Pair for storing zero. 1828 if (isNullConstant(Lo) && isNullConstant(Hi)) { 1829 RegPair = CurDAG->getRegister(RISCV::X0_Pair, MVT::Untyped); 1830 } else { 1831 SDValue Ops[] = { 1832 CurDAG->getTargetConstant(RISCV::GPRPairRegClassID, DL, MVT::i32), Lo, 1833 CurDAG->getTargetConstant(RISCV::sub_gpr_even, DL, MVT::i32), Hi, 1834 CurDAG->getTargetConstant(RISCV::sub_gpr_odd, DL, MVT::i32)}; 1835 1836 RegPair = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, 1837 MVT::Untyped, Ops), 1838 0); 1839 } 1840 1841 MachineSDNode *New = CurDAG->getMachineNode(RISCV::SD_RV32, DL, MVT::Other, 1842 {RegPair, Base, Offset, Chain}); 1843 CurDAG->setNodeMemRefs(New, {cast<MemSDNode>(Node)->getMemOperand()}); 1844 ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); 1845 CurDAG->RemoveDeadNode(Node); 1846 return; 1847 } 1848 case ISD::INTRINSIC_WO_CHAIN: { 1849 unsigned IntNo = Node->getConstantOperandVal(0); 1850 switch (IntNo) { 1851 // By default we do not custom select any intrinsic. 1852 default: 1853 break; 1854 case Intrinsic::riscv_vmsgeu: 1855 case Intrinsic::riscv_vmsge: { 1856 SDValue Src1 = Node->getOperand(1); 1857 SDValue Src2 = Node->getOperand(2); 1858 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu; 1859 bool IsCmpConstant = false; 1860 bool IsCmpMinimum = false; 1861 // Only custom select scalar second operand. 1862 if (Src2.getValueType() != XLenVT) 1863 break; 1864 // Small constants are handled with patterns. 1865 int64_t CVal = 0; 1866 MVT Src1VT = Src1.getSimpleValueType(); 1867 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1868 IsCmpConstant = true; 1869 CVal = C->getSExtValue(); 1870 if (CVal >= -15 && CVal <= 16) { 1871 if (!IsUnsigned || CVal != 0) 1872 break; 1873 IsCmpMinimum = true; 1874 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue( 1875 Src1VT.getScalarSizeInBits()) 1876 .getSExtValue()) { 1877 IsCmpMinimum = true; 1878 } 1879 } 1880 unsigned VMSLTOpcode, VMNANDOpcode, VMSetOpcode, VMSGTOpcode; 1881 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1882 default: 1883 llvm_unreachable("Unexpected LMUL!"); 1884 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \ 1885 case RISCVVType::lmulenum: \ 1886 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1887 : RISCV::PseudoVMSLT_VX_##suffix; \ 1888 VMSGTOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix \ 1889 : RISCV::PseudoVMSGT_VX_##suffix; \ 1890 break; 1891 CASE_VMSLT_OPCODES(LMUL_F8, MF8) 1892 CASE_VMSLT_OPCODES(LMUL_F4, MF4) 1893 CASE_VMSLT_OPCODES(LMUL_F2, MF2) 1894 CASE_VMSLT_OPCODES(LMUL_1, M1) 1895 CASE_VMSLT_OPCODES(LMUL_2, M2) 1896 CASE_VMSLT_OPCODES(LMUL_4, M4) 1897 CASE_VMSLT_OPCODES(LMUL_8, M8) 1898 #undef CASE_VMSLT_OPCODES 1899 } 1900 // Mask operations use the LMUL from the mask type. 1901 switch (RISCVTargetLowering::getLMUL(VT)) { 1902 default: 1903 llvm_unreachable("Unexpected LMUL!"); 1904 #define CASE_VMNAND_VMSET_OPCODES(lmulenum, suffix) \ 1905 case RISCVVType::lmulenum: \ 1906 VMNANDOpcode = RISCV::PseudoVMNAND_MM_##suffix; \ 1907 VMSetOpcode = RISCV::PseudoVMSET_M_##suffix; \ 1908 break; 1909 CASE_VMNAND_VMSET_OPCODES(LMUL_F8, B64) 1910 CASE_VMNAND_VMSET_OPCODES(LMUL_F4, B32) 1911 CASE_VMNAND_VMSET_OPCODES(LMUL_F2, B16) 1912 CASE_VMNAND_VMSET_OPCODES(LMUL_1, B8) 1913 CASE_VMNAND_VMSET_OPCODES(LMUL_2, B4) 1914 CASE_VMNAND_VMSET_OPCODES(LMUL_4, B2) 1915 CASE_VMNAND_VMSET_OPCODES(LMUL_8, B1) 1916 #undef CASE_VMNAND_VMSET_OPCODES 1917 } 1918 SDValue SEW = CurDAG->getTargetConstant( 1919 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 1920 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 1921 SDValue VL; 1922 selectVLOp(Node->getOperand(3), VL); 1923 1924 // If vmsge(u) with minimum value, expand it to vmset. 1925 if (IsCmpMinimum) { 1926 ReplaceNode(Node, 1927 CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, MaskSEW)); 1928 return; 1929 } 1930 1931 if (IsCmpConstant) { 1932 SDValue Imm = 1933 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget); 1934 1935 ReplaceNode(Node, CurDAG->getMachineNode(VMSGTOpcode, DL, VT, 1936 {Src1, Imm, VL, SEW})); 1937 return; 1938 } 1939 1940 // Expand to 1941 // vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd 1942 SDValue Cmp = SDValue( 1943 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 1944 0); 1945 ReplaceNode(Node, CurDAG->getMachineNode(VMNANDOpcode, DL, VT, 1946 {Cmp, Cmp, VL, MaskSEW})); 1947 return; 1948 } 1949 case Intrinsic::riscv_vmsgeu_mask: 1950 case Intrinsic::riscv_vmsge_mask: { 1951 SDValue Src1 = Node->getOperand(2); 1952 SDValue Src2 = Node->getOperand(3); 1953 bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; 1954 bool IsCmpConstant = false; 1955 bool IsCmpMinimum = false; 1956 // Only custom select scalar second operand. 1957 if (Src2.getValueType() != XLenVT) 1958 break; 1959 // Small constants are handled with patterns. 1960 MVT Src1VT = Src1.getSimpleValueType(); 1961 int64_t CVal = 0; 1962 if (auto *C = dyn_cast<ConstantSDNode>(Src2)) { 1963 IsCmpConstant = true; 1964 CVal = C->getSExtValue(); 1965 if (CVal >= -15 && CVal <= 16) { 1966 if (!IsUnsigned || CVal != 0) 1967 break; 1968 IsCmpMinimum = true; 1969 } else if (!IsUnsigned && CVal == APInt::getSignedMinValue( 1970 Src1VT.getScalarSizeInBits()) 1971 .getSExtValue()) { 1972 IsCmpMinimum = true; 1973 } 1974 } 1975 unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, 1976 VMOROpcode, VMSGTMaskOpcode; 1977 switch (RISCVTargetLowering::getLMUL(Src1VT)) { 1978 default: 1979 llvm_unreachable("Unexpected LMUL!"); 1980 #define CASE_VMSLT_OPCODES(lmulenum, suffix) \ 1981 case RISCVVType::lmulenum: \ 1982 VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ 1983 : RISCV::PseudoVMSLT_VX_##suffix; \ 1984 VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ 1985 : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ 1986 VMSGTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSGTU_VX_##suffix##_MASK \ 1987 : RISCV::PseudoVMSGT_VX_##suffix##_MASK; \ 1988 break; 1989 CASE_VMSLT_OPCODES(LMUL_F8, MF8) 1990 CASE_VMSLT_OPCODES(LMUL_F4, MF4) 1991 CASE_VMSLT_OPCODES(LMUL_F2, MF2) 1992 CASE_VMSLT_OPCODES(LMUL_1, M1) 1993 CASE_VMSLT_OPCODES(LMUL_2, M2) 1994 CASE_VMSLT_OPCODES(LMUL_4, M4) 1995 CASE_VMSLT_OPCODES(LMUL_8, M8) 1996 #undef CASE_VMSLT_OPCODES 1997 } 1998 // Mask operations use the LMUL from the mask type. 1999 switch (RISCVTargetLowering::getLMUL(VT)) { 2000 default: 2001 llvm_unreachable("Unexpected LMUL!"); 2002 #define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ 2003 case RISCVVType::lmulenum: \ 2004 VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ 2005 VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ 2006 VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ 2007 break; 2008 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, B64) 2009 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, B32) 2010 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, B16) 2011 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, B8) 2012 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, B4) 2013 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, B2) 2014 CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, B1) 2015 #undef CASE_VMXOR_VMANDN_VMOR_OPCODES 2016 } 2017 SDValue SEW = CurDAG->getTargetConstant( 2018 Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); 2019 SDValue MaskSEW = CurDAG->getTargetConstant(0, DL, XLenVT); 2020 SDValue VL; 2021 selectVLOp(Node->getOperand(5), VL); 2022 SDValue MaskedOff = Node->getOperand(1); 2023 SDValue Mask = Node->getOperand(4); 2024 2025 // If vmsge(u) with minimum value, expand it to vmor mask, maskedoff. 2026 if (IsCmpMinimum) { 2027 // We don't need vmor if the MaskedOff and the Mask are the same 2028 // value. 2029 if (Mask == MaskedOff) { 2030 ReplaceUses(Node, Mask.getNode()); 2031 return; 2032 } 2033 ReplaceNode(Node, 2034 CurDAG->getMachineNode(VMOROpcode, DL, VT, 2035 {Mask, MaskedOff, VL, MaskSEW})); 2036 return; 2037 } 2038 2039 // If the MaskedOff value and the Mask are the same value use 2040 // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt 2041 // This avoids needing to copy v0 to vd before starting the next sequence. 2042 if (Mask == MaskedOff) { 2043 SDValue Cmp = SDValue( 2044 CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 2045 0); 2046 ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, 2047 {Mask, Cmp, VL, MaskSEW})); 2048 return; 2049 } 2050 2051 SDValue PolicyOp = 2052 CurDAG->getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT); 2053 2054 if (IsCmpConstant) { 2055 SDValue Imm = 2056 selectImm(CurDAG, SDLoc(Src2), XLenVT, CVal - 1, *Subtarget); 2057 2058 ReplaceNode(Node, CurDAG->getMachineNode( 2059 VMSGTMaskOpcode, DL, VT, 2060 {MaskedOff, Src1, Imm, Mask, VL, SEW, PolicyOp})); 2061 return; 2062 } 2063 2064 // Otherwise use 2065 // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 2066 // The result is mask undisturbed. 2067 // We use the same instructions to emulate mask agnostic behavior, because 2068 // the agnostic result can be either undisturbed or all 1. 2069 SDValue Cmp = SDValue(CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, 2070 {MaskedOff, Src1, Src2, Mask, 2071 VL, SEW, PolicyOp}), 2072 0); 2073 // vmxor.mm vd, vd, v0 is used to update active value. 2074 ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, 2075 {Cmp, Mask, VL, MaskSEW})); 2076 return; 2077 } 2078 case Intrinsic::riscv_vsetvli: 2079 case Intrinsic::riscv_vsetvlimax: 2080 return selectVSETVLI(Node); 2081 } 2082 break; 2083 } 2084 case ISD::INTRINSIC_W_CHAIN: { 2085 unsigned IntNo = Node->getConstantOperandVal(1); 2086 switch (IntNo) { 2087 // By default we do not custom select any intrinsic. 2088 default: 2089 break; 2090 case Intrinsic::riscv_vlseg2: 2091 case Intrinsic::riscv_vlseg3: 2092 case Intrinsic::riscv_vlseg4: 2093 case Intrinsic::riscv_vlseg5: 2094 case Intrinsic::riscv_vlseg6: 2095 case Intrinsic::riscv_vlseg7: 2096 case Intrinsic::riscv_vlseg8: { 2097 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2098 /*IsStrided*/ false); 2099 return; 2100 } 2101 case Intrinsic::riscv_vlseg2_mask: 2102 case Intrinsic::riscv_vlseg3_mask: 2103 case Intrinsic::riscv_vlseg4_mask: 2104 case Intrinsic::riscv_vlseg5_mask: 2105 case Intrinsic::riscv_vlseg6_mask: 2106 case Intrinsic::riscv_vlseg7_mask: 2107 case Intrinsic::riscv_vlseg8_mask: { 2108 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2109 /*IsStrided*/ false); 2110 return; 2111 } 2112 case Intrinsic::riscv_vlsseg2: 2113 case Intrinsic::riscv_vlsseg3: 2114 case Intrinsic::riscv_vlsseg4: 2115 case Intrinsic::riscv_vlsseg5: 2116 case Intrinsic::riscv_vlsseg6: 2117 case Intrinsic::riscv_vlsseg7: 2118 case Intrinsic::riscv_vlsseg8: { 2119 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2120 /*IsStrided*/ true); 2121 return; 2122 } 2123 case Intrinsic::riscv_vlsseg2_mask: 2124 case Intrinsic::riscv_vlsseg3_mask: 2125 case Intrinsic::riscv_vlsseg4_mask: 2126 case Intrinsic::riscv_vlsseg5_mask: 2127 case Intrinsic::riscv_vlsseg6_mask: 2128 case Intrinsic::riscv_vlsseg7_mask: 2129 case Intrinsic::riscv_vlsseg8_mask: { 2130 selectVLSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2131 /*IsStrided*/ true); 2132 return; 2133 } 2134 case Intrinsic::riscv_vloxseg2: 2135 case Intrinsic::riscv_vloxseg3: 2136 case Intrinsic::riscv_vloxseg4: 2137 case Intrinsic::riscv_vloxseg5: 2138 case Intrinsic::riscv_vloxseg6: 2139 case Intrinsic::riscv_vloxseg7: 2140 case Intrinsic::riscv_vloxseg8: 2141 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2142 /*IsOrdered*/ true); 2143 return; 2144 case Intrinsic::riscv_vluxseg2: 2145 case Intrinsic::riscv_vluxseg3: 2146 case Intrinsic::riscv_vluxseg4: 2147 case Intrinsic::riscv_vluxseg5: 2148 case Intrinsic::riscv_vluxseg6: 2149 case Intrinsic::riscv_vluxseg7: 2150 case Intrinsic::riscv_vluxseg8: 2151 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2152 /*IsOrdered*/ false); 2153 return; 2154 case Intrinsic::riscv_vloxseg2_mask: 2155 case Intrinsic::riscv_vloxseg3_mask: 2156 case Intrinsic::riscv_vloxseg4_mask: 2157 case Intrinsic::riscv_vloxseg5_mask: 2158 case Intrinsic::riscv_vloxseg6_mask: 2159 case Intrinsic::riscv_vloxseg7_mask: 2160 case Intrinsic::riscv_vloxseg8_mask: 2161 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2162 /*IsOrdered*/ true); 2163 return; 2164 case Intrinsic::riscv_vluxseg2_mask: 2165 case Intrinsic::riscv_vluxseg3_mask: 2166 case Intrinsic::riscv_vluxseg4_mask: 2167 case Intrinsic::riscv_vluxseg5_mask: 2168 case Intrinsic::riscv_vluxseg6_mask: 2169 case Intrinsic::riscv_vluxseg7_mask: 2170 case Intrinsic::riscv_vluxseg8_mask: 2171 selectVLXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2172 /*IsOrdered*/ false); 2173 return; 2174 case Intrinsic::riscv_vlseg8ff: 2175 case Intrinsic::riscv_vlseg7ff: 2176 case Intrinsic::riscv_vlseg6ff: 2177 case Intrinsic::riscv_vlseg5ff: 2178 case Intrinsic::riscv_vlseg4ff: 2179 case Intrinsic::riscv_vlseg3ff: 2180 case Intrinsic::riscv_vlseg2ff: { 2181 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ false); 2182 return; 2183 } 2184 case Intrinsic::riscv_vlseg8ff_mask: 2185 case Intrinsic::riscv_vlseg7ff_mask: 2186 case Intrinsic::riscv_vlseg6ff_mask: 2187 case Intrinsic::riscv_vlseg5ff_mask: 2188 case Intrinsic::riscv_vlseg4ff_mask: 2189 case Intrinsic::riscv_vlseg3ff_mask: 2190 case Intrinsic::riscv_vlseg2ff_mask: { 2191 selectVLSEGFF(Node, getSegInstNF(IntNo), /*IsMasked*/ true); 2192 return; 2193 } 2194 case Intrinsic::riscv_vloxei: 2195 case Intrinsic::riscv_vloxei_mask: 2196 case Intrinsic::riscv_vluxei: 2197 case Intrinsic::riscv_vluxei_mask: { 2198 bool IsMasked = IntNo == Intrinsic::riscv_vloxei_mask || 2199 IntNo == Intrinsic::riscv_vluxei_mask; 2200 bool IsOrdered = IntNo == Intrinsic::riscv_vloxei || 2201 IntNo == Intrinsic::riscv_vloxei_mask; 2202 2203 MVT VT = Node->getSimpleValueType(0); 2204 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2205 2206 unsigned CurOp = 2; 2207 SmallVector<SDValue, 8> Operands; 2208 Operands.push_back(Node->getOperand(CurOp++)); 2209 2210 MVT IndexVT; 2211 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 2212 /*IsStridedOrIndexed*/ true, Operands, 2213 /*IsLoad=*/true, &IndexVT); 2214 2215 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 2216 "Element count mismatch"); 2217 2218 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2219 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 2220 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 2221 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 2222 report_fatal_error("The V extension does not support EEW=64 for index " 2223 "values when XLEN=32"); 2224 } 2225 const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( 2226 IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), 2227 static_cast<unsigned>(IndexLMUL)); 2228 MachineSDNode *Load = 2229 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2230 2231 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()}); 2232 2233 ReplaceNode(Node, Load); 2234 return; 2235 } 2236 case Intrinsic::riscv_vlm: 2237 case Intrinsic::riscv_vle: 2238 case Intrinsic::riscv_vle_mask: 2239 case Intrinsic::riscv_vlse: 2240 case Intrinsic::riscv_vlse_mask: { 2241 bool IsMasked = IntNo == Intrinsic::riscv_vle_mask || 2242 IntNo == Intrinsic::riscv_vlse_mask; 2243 bool IsStrided = 2244 IntNo == Intrinsic::riscv_vlse || IntNo == Intrinsic::riscv_vlse_mask; 2245 2246 MVT VT = Node->getSimpleValueType(0); 2247 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2248 2249 // The riscv_vlm intrinsic are always tail agnostic and no passthru 2250 // operand at the IR level. In pseudos, they have both policy and 2251 // passthru operand. The passthru operand is needed to track the 2252 // "tail undefined" state, and the policy is there just for 2253 // for consistency - it will always be "don't care" for the 2254 // unmasked form. 2255 bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; 2256 unsigned CurOp = 2; 2257 SmallVector<SDValue, 8> Operands; 2258 if (HasPassthruOperand) 2259 Operands.push_back(Node->getOperand(CurOp++)); 2260 else { 2261 // We eagerly lower to implicit_def (instead of undef), as we 2262 // otherwise fail to select nodes such as: nxv1i1 = undef 2263 SDNode *Passthru = 2264 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 2265 Operands.push_back(SDValue(Passthru, 0)); 2266 } 2267 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 2268 Operands, /*IsLoad=*/true); 2269 2270 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2271 const RISCV::VLEPseudo *P = 2272 RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, 2273 static_cast<unsigned>(LMUL)); 2274 MachineSDNode *Load = 2275 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2276 2277 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()}); 2278 2279 ReplaceNode(Node, Load); 2280 return; 2281 } 2282 case Intrinsic::riscv_vleff: 2283 case Intrinsic::riscv_vleff_mask: { 2284 bool IsMasked = IntNo == Intrinsic::riscv_vleff_mask; 2285 2286 MVT VT = Node->getSimpleValueType(0); 2287 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2288 2289 unsigned CurOp = 2; 2290 SmallVector<SDValue, 7> Operands; 2291 Operands.push_back(Node->getOperand(CurOp++)); 2292 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 2293 /*IsStridedOrIndexed*/ false, Operands, 2294 /*IsLoad=*/true); 2295 2296 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2297 const RISCV::VLEPseudo *P = 2298 RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, 2299 Log2SEW, static_cast<unsigned>(LMUL)); 2300 MachineSDNode *Load = CurDAG->getMachineNode( 2301 P->Pseudo, DL, Node->getVTList(), Operands); 2302 CurDAG->setNodeMemRefs(Load, {cast<MemSDNode>(Node)->getMemOperand()}); 2303 2304 ReplaceNode(Node, Load); 2305 return; 2306 } 2307 case Intrinsic::riscv_nds_vln: 2308 case Intrinsic::riscv_nds_vln_mask: 2309 case Intrinsic::riscv_nds_vlnu: 2310 case Intrinsic::riscv_nds_vlnu_mask: { 2311 bool IsMasked = IntNo == Intrinsic::riscv_nds_vln_mask || 2312 IntNo == Intrinsic::riscv_nds_vlnu_mask; 2313 bool IsUnsigned = IntNo == Intrinsic::riscv_nds_vlnu || 2314 IntNo == Intrinsic::riscv_nds_vlnu_mask; 2315 2316 MVT VT = Node->getSimpleValueType(0); 2317 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2318 unsigned CurOp = 2; 2319 SmallVector<SDValue, 8> Operands; 2320 2321 Operands.push_back(Node->getOperand(CurOp++)); 2322 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 2323 /*IsStridedOrIndexed=*/false, Operands, 2324 /*IsLoad=*/true); 2325 2326 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2327 const RISCV::NDSVLNPseudo *P = RISCV::getNDSVLNPseudo( 2328 IsMasked, IsUnsigned, Log2SEW, static_cast<unsigned>(LMUL)); 2329 MachineSDNode *Load = 2330 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2331 2332 if (auto *MemOp = dyn_cast<MemSDNode>(Node)) 2333 CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); 2334 2335 ReplaceNode(Node, Load); 2336 return; 2337 } 2338 } 2339 break; 2340 } 2341 case ISD::INTRINSIC_VOID: { 2342 unsigned IntNo = Node->getConstantOperandVal(1); 2343 switch (IntNo) { 2344 case Intrinsic::riscv_vsseg2: 2345 case Intrinsic::riscv_vsseg3: 2346 case Intrinsic::riscv_vsseg4: 2347 case Intrinsic::riscv_vsseg5: 2348 case Intrinsic::riscv_vsseg6: 2349 case Intrinsic::riscv_vsseg7: 2350 case Intrinsic::riscv_vsseg8: { 2351 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2352 /*IsStrided*/ false); 2353 return; 2354 } 2355 case Intrinsic::riscv_vsseg2_mask: 2356 case Intrinsic::riscv_vsseg3_mask: 2357 case Intrinsic::riscv_vsseg4_mask: 2358 case Intrinsic::riscv_vsseg5_mask: 2359 case Intrinsic::riscv_vsseg6_mask: 2360 case Intrinsic::riscv_vsseg7_mask: 2361 case Intrinsic::riscv_vsseg8_mask: { 2362 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2363 /*IsStrided*/ false); 2364 return; 2365 } 2366 case Intrinsic::riscv_vssseg2: 2367 case Intrinsic::riscv_vssseg3: 2368 case Intrinsic::riscv_vssseg4: 2369 case Intrinsic::riscv_vssseg5: 2370 case Intrinsic::riscv_vssseg6: 2371 case Intrinsic::riscv_vssseg7: 2372 case Intrinsic::riscv_vssseg8: { 2373 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2374 /*IsStrided*/ true); 2375 return; 2376 } 2377 case Intrinsic::riscv_vssseg2_mask: 2378 case Intrinsic::riscv_vssseg3_mask: 2379 case Intrinsic::riscv_vssseg4_mask: 2380 case Intrinsic::riscv_vssseg5_mask: 2381 case Intrinsic::riscv_vssseg6_mask: 2382 case Intrinsic::riscv_vssseg7_mask: 2383 case Intrinsic::riscv_vssseg8_mask: { 2384 selectVSSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2385 /*IsStrided*/ true); 2386 return; 2387 } 2388 case Intrinsic::riscv_vsoxseg2: 2389 case Intrinsic::riscv_vsoxseg3: 2390 case Intrinsic::riscv_vsoxseg4: 2391 case Intrinsic::riscv_vsoxseg5: 2392 case Intrinsic::riscv_vsoxseg6: 2393 case Intrinsic::riscv_vsoxseg7: 2394 case Intrinsic::riscv_vsoxseg8: 2395 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2396 /*IsOrdered*/ true); 2397 return; 2398 case Intrinsic::riscv_vsuxseg2: 2399 case Intrinsic::riscv_vsuxseg3: 2400 case Intrinsic::riscv_vsuxseg4: 2401 case Intrinsic::riscv_vsuxseg5: 2402 case Intrinsic::riscv_vsuxseg6: 2403 case Intrinsic::riscv_vsuxseg7: 2404 case Intrinsic::riscv_vsuxseg8: 2405 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ false, 2406 /*IsOrdered*/ false); 2407 return; 2408 case Intrinsic::riscv_vsoxseg2_mask: 2409 case Intrinsic::riscv_vsoxseg3_mask: 2410 case Intrinsic::riscv_vsoxseg4_mask: 2411 case Intrinsic::riscv_vsoxseg5_mask: 2412 case Intrinsic::riscv_vsoxseg6_mask: 2413 case Intrinsic::riscv_vsoxseg7_mask: 2414 case Intrinsic::riscv_vsoxseg8_mask: 2415 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2416 /*IsOrdered*/ true); 2417 return; 2418 case Intrinsic::riscv_vsuxseg2_mask: 2419 case Intrinsic::riscv_vsuxseg3_mask: 2420 case Intrinsic::riscv_vsuxseg4_mask: 2421 case Intrinsic::riscv_vsuxseg5_mask: 2422 case Intrinsic::riscv_vsuxseg6_mask: 2423 case Intrinsic::riscv_vsuxseg7_mask: 2424 case Intrinsic::riscv_vsuxseg8_mask: 2425 selectVSXSEG(Node, getSegInstNF(IntNo), /*IsMasked*/ true, 2426 /*IsOrdered*/ false); 2427 return; 2428 case Intrinsic::riscv_vsoxei: 2429 case Intrinsic::riscv_vsoxei_mask: 2430 case Intrinsic::riscv_vsuxei: 2431 case Intrinsic::riscv_vsuxei_mask: { 2432 bool IsMasked = IntNo == Intrinsic::riscv_vsoxei_mask || 2433 IntNo == Intrinsic::riscv_vsuxei_mask; 2434 bool IsOrdered = IntNo == Intrinsic::riscv_vsoxei || 2435 IntNo == Intrinsic::riscv_vsoxei_mask; 2436 2437 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 2438 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2439 2440 unsigned CurOp = 2; 2441 SmallVector<SDValue, 8> Operands; 2442 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 2443 2444 MVT IndexVT; 2445 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, 2446 /*IsStridedOrIndexed*/ true, Operands, 2447 /*IsLoad=*/false, &IndexVT); 2448 2449 assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() && 2450 "Element count mismatch"); 2451 2452 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2453 RISCVVType::VLMUL IndexLMUL = RISCVTargetLowering::getLMUL(IndexVT); 2454 unsigned IndexLog2EEW = Log2_32(IndexVT.getScalarSizeInBits()); 2455 if (IndexLog2EEW == 6 && !Subtarget->is64Bit()) { 2456 report_fatal_error("The V extension does not support EEW=64 for index " 2457 "values when XLEN=32"); 2458 } 2459 const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( 2460 IsMasked, IsOrdered, IndexLog2EEW, 2461 static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); 2462 MachineSDNode *Store = 2463 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2464 2465 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()}); 2466 2467 ReplaceNode(Node, Store); 2468 return; 2469 } 2470 case Intrinsic::riscv_vsm: 2471 case Intrinsic::riscv_vse: 2472 case Intrinsic::riscv_vse_mask: 2473 case Intrinsic::riscv_vsse: 2474 case Intrinsic::riscv_vsse_mask: { 2475 bool IsMasked = IntNo == Intrinsic::riscv_vse_mask || 2476 IntNo == Intrinsic::riscv_vsse_mask; 2477 bool IsStrided = 2478 IntNo == Intrinsic::riscv_vsse || IntNo == Intrinsic::riscv_vsse_mask; 2479 2480 MVT VT = Node->getOperand(2)->getSimpleValueType(0); 2481 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2482 2483 unsigned CurOp = 2; 2484 SmallVector<SDValue, 8> Operands; 2485 Operands.push_back(Node->getOperand(CurOp++)); // Store value. 2486 2487 addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, 2488 Operands); 2489 2490 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2491 const RISCV::VSEPseudo *P = RISCV::getVSEPseudo( 2492 IsMasked, IsStrided, Log2SEW, static_cast<unsigned>(LMUL)); 2493 MachineSDNode *Store = 2494 CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); 2495 CurDAG->setNodeMemRefs(Store, {cast<MemSDNode>(Node)->getMemOperand()}); 2496 2497 ReplaceNode(Node, Store); 2498 return; 2499 } 2500 case Intrinsic::riscv_sf_vc_x_se: 2501 case Intrinsic::riscv_sf_vc_i_se: 2502 selectSF_VC_X_SE(Node); 2503 return; 2504 } 2505 break; 2506 } 2507 case ISD::BITCAST: { 2508 MVT SrcVT = Node->getOperand(0).getSimpleValueType(); 2509 // Just drop bitcasts between vectors if both are fixed or both are 2510 // scalable. 2511 if ((VT.isScalableVector() && SrcVT.isScalableVector()) || 2512 (VT.isFixedLengthVector() && SrcVT.isFixedLengthVector())) { 2513 ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); 2514 CurDAG->RemoveDeadNode(Node); 2515 return; 2516 } 2517 break; 2518 } 2519 case ISD::INSERT_SUBVECTOR: 2520 case RISCVISD::TUPLE_INSERT: { 2521 SDValue V = Node->getOperand(0); 2522 SDValue SubV = Node->getOperand(1); 2523 SDLoc DL(SubV); 2524 auto Idx = Node->getConstantOperandVal(2); 2525 MVT SubVecVT = SubV.getSimpleValueType(); 2526 2527 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2528 MVT SubVecContainerVT = SubVecVT; 2529 // Establish the correct scalable-vector types for any fixed-length type. 2530 if (SubVecVT.isFixedLengthVector()) { 2531 SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT); 2532 TypeSize VecRegSize = TypeSize::getScalable(RISCV::RVVBitsPerBlock); 2533 [[maybe_unused]] bool ExactlyVecRegSized = 2534 Subtarget->expandVScale(SubVecVT.getSizeInBits()) 2535 .isKnownMultipleOf(Subtarget->expandVScale(VecRegSize)); 2536 assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits()) 2537 .getKnownMinValue())); 2538 assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef())); 2539 } 2540 MVT ContainerVT = VT; 2541 if (VT.isFixedLengthVector()) 2542 ContainerVT = TLI.getContainerForFixedLengthVector(VT); 2543 2544 const auto *TRI = Subtarget->getRegisterInfo(); 2545 unsigned SubRegIdx; 2546 std::tie(SubRegIdx, Idx) = 2547 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2548 ContainerVT, SubVecContainerVT, Idx, TRI); 2549 2550 // If the Idx hasn't been completely eliminated then this is a subvector 2551 // insert which doesn't naturally align to a vector register. These must 2552 // be handled using instructions to manipulate the vector registers. 2553 if (Idx != 0) 2554 break; 2555 2556 RISCVVType::VLMUL SubVecLMUL = 2557 RISCVTargetLowering::getLMUL(SubVecContainerVT); 2558 [[maybe_unused]] bool IsSubVecPartReg = 2559 SubVecLMUL == RISCVVType::VLMUL::LMUL_F2 || 2560 SubVecLMUL == RISCVVType::VLMUL::LMUL_F4 || 2561 SubVecLMUL == RISCVVType::VLMUL::LMUL_F8; 2562 assert((V.getValueType().isRISCVVectorTuple() || !IsSubVecPartReg || 2563 V.isUndef()) && 2564 "Expecting lowering to have created legal INSERT_SUBVECTORs when " 2565 "the subvector is smaller than a full-sized register"); 2566 2567 // If we haven't set a SubRegIdx, then we must be going between 2568 // equally-sized LMUL groups (e.g. VR -> VR). This can be done as a copy. 2569 if (SubRegIdx == RISCV::NoSubRegister) { 2570 unsigned InRegClassID = 2571 RISCVTargetLowering::getRegClassIDForVecVT(ContainerVT); 2572 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2573 InRegClassID && 2574 "Unexpected subvector extraction"); 2575 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2576 SDNode *NewNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 2577 DL, VT, SubV, RC); 2578 ReplaceNode(Node, NewNode); 2579 return; 2580 } 2581 2582 SDValue Insert = CurDAG->getTargetInsertSubreg(SubRegIdx, DL, VT, V, SubV); 2583 ReplaceNode(Node, Insert.getNode()); 2584 return; 2585 } 2586 case ISD::EXTRACT_SUBVECTOR: 2587 case RISCVISD::TUPLE_EXTRACT: { 2588 SDValue V = Node->getOperand(0); 2589 auto Idx = Node->getConstantOperandVal(1); 2590 MVT InVT = V.getSimpleValueType(); 2591 SDLoc DL(V); 2592 2593 const RISCVTargetLowering &TLI = *Subtarget->getTargetLowering(); 2594 MVT SubVecContainerVT = VT; 2595 // Establish the correct scalable-vector types for any fixed-length type. 2596 if (VT.isFixedLengthVector()) { 2597 assert(Idx == 0); 2598 SubVecContainerVT = TLI.getContainerForFixedLengthVector(VT); 2599 } 2600 if (InVT.isFixedLengthVector()) 2601 InVT = TLI.getContainerForFixedLengthVector(InVT); 2602 2603 const auto *TRI = Subtarget->getRegisterInfo(); 2604 unsigned SubRegIdx; 2605 std::tie(SubRegIdx, Idx) = 2606 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs( 2607 InVT, SubVecContainerVT, Idx, TRI); 2608 2609 // If the Idx hasn't been completely eliminated then this is a subvector 2610 // extract which doesn't naturally align to a vector register. These must 2611 // be handled using instructions to manipulate the vector registers. 2612 if (Idx != 0) 2613 break; 2614 2615 // If we haven't set a SubRegIdx, then we must be going between 2616 // equally-sized LMUL types (e.g. VR -> VR). This can be done as a copy. 2617 if (SubRegIdx == RISCV::NoSubRegister) { 2618 unsigned InRegClassID = RISCVTargetLowering::getRegClassIDForVecVT(InVT); 2619 assert(RISCVTargetLowering::getRegClassIDForVecVT(SubVecContainerVT) == 2620 InRegClassID && 2621 "Unexpected subvector extraction"); 2622 SDValue RC = CurDAG->getTargetConstant(InRegClassID, DL, XLenVT); 2623 SDNode *NewNode = 2624 CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 2625 ReplaceNode(Node, NewNode); 2626 return; 2627 } 2628 2629 SDValue Extract = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, V); 2630 ReplaceNode(Node, Extract.getNode()); 2631 return; 2632 } 2633 case RISCVISD::VMV_S_X_VL: 2634 case RISCVISD::VFMV_S_F_VL: 2635 case RISCVISD::VMV_V_X_VL: 2636 case RISCVISD::VFMV_V_F_VL: { 2637 // Try to match splat of a scalar load to a strided load with stride of x0. 2638 bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || 2639 Node->getOpcode() == RISCVISD::VFMV_S_F_VL; 2640 if (!Node->getOperand(0).isUndef()) 2641 break; 2642 SDValue Src = Node->getOperand(1); 2643 auto *Ld = dyn_cast<LoadSDNode>(Src); 2644 // Can't fold load update node because the second 2645 // output is used so that load update node can't be removed. 2646 if (!Ld || Ld->isIndexed()) 2647 break; 2648 EVT MemVT = Ld->getMemoryVT(); 2649 // The memory VT should be the same size as the element type. 2650 if (MemVT.getStoreSize() != VT.getVectorElementType().getStoreSize()) 2651 break; 2652 if (!IsProfitableToFold(Src, Node, Node) || 2653 !IsLegalToFold(Src, Node, Node, TM.getOptLevel())) 2654 break; 2655 2656 SDValue VL; 2657 if (IsScalarMove) { 2658 // We could deal with more VL if we update the VSETVLI insert pass to 2659 // avoid introducing more VSETVLI. 2660 if (!isOneConstant(Node->getOperand(2))) 2661 break; 2662 selectVLOp(Node->getOperand(2), VL); 2663 } else 2664 selectVLOp(Node->getOperand(2), VL); 2665 2666 unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); 2667 SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); 2668 2669 // If VL=1, then we don't need to do a strided load and can just do a 2670 // regular load. 2671 bool IsStrided = !isOneConstant(VL); 2672 2673 // Only do a strided load if we have optimized zero-stride vector load. 2674 if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) 2675 break; 2676 2677 SmallVector<SDValue> Operands = { 2678 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0), 2679 Ld->getBasePtr()}; 2680 if (IsStrided) 2681 Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); 2682 uint64_t Policy = RISCVVType::MASK_AGNOSTIC | RISCVVType::TAIL_AGNOSTIC; 2683 SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); 2684 Operands.append({VL, SEW, PolicyOp, Ld->getChain()}); 2685 2686 RISCVVType::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); 2687 const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( 2688 /*IsMasked*/ false, IsStrided, /*FF*/ false, 2689 Log2SEW, static_cast<unsigned>(LMUL)); 2690 MachineSDNode *Load = 2691 CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); 2692 // Update the chain. 2693 ReplaceUses(Src.getValue(1), SDValue(Load, 1)); 2694 // Record the mem-refs 2695 CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); 2696 // Replace the splat with the vlse. 2697 ReplaceNode(Node, Load); 2698 return; 2699 } 2700 case ISD::PREFETCH: 2701 unsigned Locality = Node->getConstantOperandVal(3); 2702 if (Locality > 2) 2703 break; 2704 2705 auto *LoadStoreMem = cast<MemSDNode>(Node); 2706 MachineMemOperand *MMO = LoadStoreMem->getMemOperand(); 2707 MMO->setFlags(MachineMemOperand::MONonTemporal); 2708 2709 int NontemporalLevel = 0; 2710 switch (Locality) { 2711 case 0: 2712 NontemporalLevel = 3; // NTL.ALL 2713 break; 2714 case 1: 2715 NontemporalLevel = 1; // NTL.PALL 2716 break; 2717 case 2: 2718 NontemporalLevel = 0; // NTL.P1 2719 break; 2720 default: 2721 llvm_unreachable("unexpected locality value."); 2722 } 2723 2724 if (NontemporalLevel & 0b1) 2725 MMO->setFlags(MONontemporalBit0); 2726 if (NontemporalLevel & 0b10) 2727 MMO->setFlags(MONontemporalBit1); 2728 break; 2729 } 2730 2731 // Select the default instruction. 2732 SelectCode(Node); 2733 } 2734 2735 bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( 2736 const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, 2737 std::vector<SDValue> &OutOps) { 2738 // Always produce a register and immediate operand, as expected by 2739 // RISCVAsmPrinter::PrintAsmMemoryOperand. 2740 switch (ConstraintID) { 2741 case InlineAsm::ConstraintCode::o: 2742 case InlineAsm::ConstraintCode::m: { 2743 SDValue Op0, Op1; 2744 [[maybe_unused]] bool Found = SelectAddrRegImm(Op, Op0, Op1); 2745 assert(Found && "SelectAddrRegImm should always succeed"); 2746 OutOps.push_back(Op0); 2747 OutOps.push_back(Op1); 2748 return false; 2749 } 2750 case InlineAsm::ConstraintCode::A: 2751 OutOps.push_back(Op); 2752 OutOps.push_back( 2753 CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT())); 2754 return false; 2755 default: 2756 report_fatal_error("Unexpected asm memory constraint " + 2757 InlineAsm::getMemConstraintName(ConstraintID)); 2758 } 2759 2760 return true; 2761 } 2762 2763 bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, 2764 SDValue &Offset) { 2765 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { 2766 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); 2767 Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); 2768 return true; 2769 } 2770 2771 return false; 2772 } 2773 2774 // Fold constant addresses. 2775 static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, 2776 const MVT VT, const RISCVSubtarget *Subtarget, 2777 SDValue Addr, SDValue &Base, SDValue &Offset, 2778 bool IsPrefetch = false) { 2779 if (!isa<ConstantSDNode>(Addr)) 2780 return false; 2781 2782 int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue(); 2783 2784 // If the constant is a simm12, we can fold the whole constant and use X0 as 2785 // the base. If the constant can be materialized with LUI+simm12, use LUI as 2786 // the base. We can't use generateInstSeq because it favors LUI+ADDIW. 2787 int64_t Lo12 = SignExtend64<12>(CVal); 2788 int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12; 2789 if (!Subtarget->is64Bit() || isInt<32>(Hi)) { 2790 if (IsPrefetch && (Lo12 & 0b11111) != 0) 2791 return false; 2792 if (Hi) { 2793 int64_t Hi20 = (Hi >> 12) & 0xfffff; 2794 Base = SDValue( 2795 CurDAG->getMachineNode(RISCV::LUI, DL, VT, 2796 CurDAG->getTargetConstant(Hi20, DL, VT)), 2797 0); 2798 } else { 2799 Base = CurDAG->getRegister(RISCV::X0, VT); 2800 } 2801 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT); 2802 return true; 2803 } 2804 2805 // Ask how constant materialization would handle this constant. 2806 RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget); 2807 2808 // If the last instruction would be an ADDI, we can fold its immediate and 2809 // emit the rest of the sequence as the base. 2810 if (Seq.back().getOpcode() != RISCV::ADDI) 2811 return false; 2812 Lo12 = Seq.back().getImm(); 2813 if (IsPrefetch && (Lo12 & 0b11111) != 0) 2814 return false; 2815 2816 // Drop the last instruction. 2817 Seq.pop_back(); 2818 assert(!Seq.empty() && "Expected more instructions in sequence"); 2819 2820 Base = selectImmSeq(CurDAG, DL, VT, Seq); 2821 Offset = CurDAG->getSignedTargetConstant(Lo12, DL, VT); 2822 return true; 2823 } 2824 2825 // Is this ADD instruction only used as the base pointer of scalar loads and 2826 // stores? 2827 static bool isWorthFoldingAdd(SDValue Add) { 2828 for (auto *User : Add->users()) { 2829 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE && 2830 User->getOpcode() != ISD::ATOMIC_LOAD && 2831 User->getOpcode() != ISD::ATOMIC_STORE) 2832 return false; 2833 EVT VT = cast<MemSDNode>(User)->getMemoryVT(); 2834 if (!VT.isScalarInteger() && VT != MVT::f16 && VT != MVT::f32 && 2835 VT != MVT::f64) 2836 return false; 2837 // Don't allow stores of the value. It must be used as the address. 2838 if (User->getOpcode() == ISD::STORE && 2839 cast<StoreSDNode>(User)->getValue() == Add) 2840 return false; 2841 if (User->getOpcode() == ISD::ATOMIC_STORE && 2842 cast<AtomicSDNode>(User)->getVal() == Add) 2843 return false; 2844 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering())) 2845 return false; 2846 } 2847 2848 return true; 2849 } 2850 2851 bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, 2852 SDValue &Offset) { 2853 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2854 return true; 2855 2856 SDLoc DL(Addr); 2857 MVT VT = Addr.getSimpleValueType(); 2858 2859 if (Addr.getOpcode() == RISCVISD::ADD_LO) { 2860 Base = Addr.getOperand(0); 2861 Offset = Addr.getOperand(1); 2862 return true; 2863 } 2864 2865 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2866 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2867 if (isInt<12>(CVal) && isInt<12>(CVal)) { 2868 Base = Addr.getOperand(0); 2869 if (Base.getOpcode() == RISCVISD::ADD_LO) { 2870 SDValue LoOperand = Base.getOperand(1); 2871 if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { 2872 // If the Lo in (ADD_LO hi, lo) is a global variable's address 2873 // (its low part, really), then we can rely on the alignment of that 2874 // variable to provide a margin of safety before low part can overflow 2875 // the 12 bits of the load/store offset. Check if CVal falls within 2876 // that margin; if so (low part + CVal) can't overflow. 2877 const DataLayout &DL = CurDAG->getDataLayout(); 2878 Align Alignment = commonAlignment( 2879 GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); 2880 if ((CVal == 0 || Alignment > CVal)) { 2881 int64_t CombinedOffset = CVal + GA->getOffset(); 2882 Base = Base.getOperand(0); 2883 Offset = CurDAG->getTargetGlobalAddress( 2884 GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), 2885 CombinedOffset, GA->getTargetFlags()); 2886 return true; 2887 } 2888 } 2889 } 2890 2891 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2892 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2893 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT); 2894 return true; 2895 } 2896 } 2897 2898 // Handle ADD with large immediates. 2899 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2900 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2901 assert(!isInt<12>(CVal) && "simm12 not already handled?"); 2902 2903 // Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use 2904 // an ADDI for part of the offset and fold the rest into the load/store. 2905 // This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. 2906 if (CVal >= -4096 && CVal <= 4094) { 2907 int64_t Adj = CVal < 0 ? -2048 : 2047; 2908 Base = SDValue( 2909 CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), 2910 CurDAG->getSignedTargetConstant(Adj, DL, VT)), 2911 0); 2912 Offset = CurDAG->getSignedTargetConstant(CVal - Adj, DL, VT); 2913 return true; 2914 } 2915 2916 // For larger immediates, we might be able to save one instruction from 2917 // constant materialization by folding the Lo12 bits of the immediate into 2918 // the address. We should only do this if the ADD is only used by loads and 2919 // stores that can fold the lo12 bits. Otherwise, the ADD will get iseled 2920 // separately with the full materialized immediate creating extra 2921 // instructions. 2922 if (isWorthFoldingAdd(Addr) && 2923 selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 2924 Offset, /*IsPrefetch=*/false)) { 2925 // Insert an ADD instruction with the materialized Hi52 bits. 2926 Base = SDValue( 2927 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 2928 0); 2929 return true; 2930 } 2931 } 2932 2933 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, 2934 /*IsPrefetch=*/false)) 2935 return true; 2936 2937 Base = Addr; 2938 Offset = CurDAG->getTargetConstant(0, DL, VT); 2939 return true; 2940 } 2941 2942 /// Similar to SelectAddrRegImm, except that the offset is restricted to uimm9. 2943 bool RISCVDAGToDAGISel::SelectAddrRegImm9(SDValue Addr, SDValue &Base, 2944 SDValue &Offset) { 2945 // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only 2946 // a 9-bit immediate can be folded. 2947 2948 SDLoc DL(Addr); 2949 MVT VT = Addr.getSimpleValueType(); 2950 2951 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2952 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2953 if (isUInt<9>(CVal)) { 2954 Base = Addr.getOperand(0); 2955 2956 // FIXME: Support FrameIndex. Need to teach eliminateFrameIndex that only 2957 // a 9-bit immediate can be folded. 2958 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT); 2959 return true; 2960 } 2961 } 2962 2963 Base = Addr; 2964 Offset = CurDAG->getTargetConstant(0, DL, VT); 2965 return true; 2966 } 2967 2968 /// Similar to SelectAddrRegImm, except that the least significant 5 bits of 2969 /// Offset should be all zeros. 2970 bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base, 2971 SDValue &Offset) { 2972 if (SelectAddrFrameIndex(Addr, Base, Offset)) 2973 return true; 2974 2975 SDLoc DL(Addr); 2976 MVT VT = Addr.getSimpleValueType(); 2977 2978 if (CurDAG->isBaseWithConstantOffset(Addr)) { 2979 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 2980 if (isInt<12>(CVal)) { 2981 Base = Addr.getOperand(0); 2982 2983 // Early-out if not a valid offset. 2984 if ((CVal & 0b11111) != 0) { 2985 Base = Addr; 2986 Offset = CurDAG->getTargetConstant(0, DL, VT); 2987 return true; 2988 } 2989 2990 if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) 2991 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); 2992 Offset = CurDAG->getSignedTargetConstant(CVal, DL, VT); 2993 return true; 2994 } 2995 } 2996 2997 // Handle ADD with large immediates. 2998 if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { 2999 int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); 3000 assert(!isInt<12>(CVal) && "simm12 not already handled?"); 3001 3002 // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save 3003 // one instruction by folding adjustment (-2048 or 2016) into the address. 3004 if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) { 3005 int64_t Adj = CVal < 0 ? -2048 : 2016; 3006 int64_t AdjustedOffset = CVal - Adj; 3007 Base = 3008 SDValue(CurDAG->getMachineNode( 3009 RISCV::ADDI, DL, VT, Addr.getOperand(0), 3010 CurDAG->getSignedTargetConstant(AdjustedOffset, DL, VT)), 3011 0); 3012 Offset = CurDAG->getSignedTargetConstant(Adj, DL, VT); 3013 return true; 3014 } 3015 3016 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base, 3017 Offset, /*IsPrefetch=*/true)) { 3018 // Insert an ADD instruction with the materialized Hi52 bits. 3019 Base = SDValue( 3020 CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base), 3021 0); 3022 return true; 3023 } 3024 } 3025 3026 if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, 3027 /*IsPrefetch=*/true)) 3028 return true; 3029 3030 Base = Addr; 3031 Offset = CurDAG->getTargetConstant(0, DL, VT); 3032 return true; 3033 } 3034 3035 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr, 3036 unsigned MaxShiftAmount, 3037 SDValue &Base, SDValue &Index, 3038 SDValue &Scale) { 3039 EVT VT = Addr.getSimpleValueType(); 3040 auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index, 3041 SDValue &Shift) { 3042 uint64_t ShiftAmt = 0; 3043 Index = N; 3044 3045 if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) { 3046 // Only match shifts by a value in range [0, MaxShiftAmount]. 3047 if (N.getConstantOperandVal(1) <= MaxShiftAmount) { 3048 Index = N.getOperand(0); 3049 ShiftAmt = N.getConstantOperandVal(1); 3050 } 3051 } 3052 3053 Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT); 3054 return ShiftAmt != 0; 3055 }; 3056 3057 if (Addr.getOpcode() == ISD::ADD) { 3058 if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { 3059 SDValue AddrB = Addr.getOperand(0); 3060 if (AddrB.getOpcode() == ISD::ADD && 3061 UnwrapShl(AddrB.getOperand(0), Index, Scale) && 3062 !isa<ConstantSDNode>(AddrB.getOperand(1)) && 3063 isInt<12>(C1->getSExtValue())) { 3064 // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2)) 3065 SDValue C1Val = 3066 CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT); 3067 Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT, 3068 AddrB.getOperand(1), C1Val), 3069 0); 3070 return true; 3071 } 3072 } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) { 3073 Base = Addr.getOperand(1); 3074 return true; 3075 } else { 3076 UnwrapShl(Addr.getOperand(1), Index, Scale); 3077 Base = Addr.getOperand(0); 3078 return true; 3079 } 3080 } 3081 3082 return false; 3083 } 3084 3085 bool RISCVDAGToDAGISel::SelectAddrRegReg(SDValue Addr, SDValue &Base, 3086 SDValue &Offset) { 3087 if (Addr.getOpcode() != ISD::ADD) 3088 return false; 3089 3090 if (isa<ConstantSDNode>(Addr.getOperand(1))) 3091 return false; 3092 3093 Base = Addr.getOperand(0); 3094 Offset = Addr.getOperand(1); 3095 return true; 3096 } 3097 3098 bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, 3099 SDValue &ShAmt) { 3100 ShAmt = N; 3101 3102 // Peek through zext. 3103 if (ShAmt->getOpcode() == ISD::ZERO_EXTEND) 3104 ShAmt = ShAmt.getOperand(0); 3105 3106 // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift 3107 // amount. If there is an AND on the shift amount, we can bypass it if it 3108 // doesn't affect any of those bits. 3109 if (ShAmt.getOpcode() == ISD::AND && 3110 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 3111 const APInt &AndMask = ShAmt.getConstantOperandAPInt(1); 3112 3113 // Since the max shift amount is a power of 2 we can subtract 1 to make a 3114 // mask that covers the bits needed to represent all shift amounts. 3115 assert(isPowerOf2_32(ShiftWidth) && "Unexpected max shift amount!"); 3116 APInt ShMask(AndMask.getBitWidth(), ShiftWidth - 1); 3117 3118 if (ShMask.isSubsetOf(AndMask)) { 3119 ShAmt = ShAmt.getOperand(0); 3120 } else { 3121 // SimplifyDemandedBits may have optimized the mask so try restoring any 3122 // bits that are known zero. 3123 KnownBits Known = CurDAG->computeKnownBits(ShAmt.getOperand(0)); 3124 if (!ShMask.isSubsetOf(AndMask | Known.Zero)) 3125 return true; 3126 ShAmt = ShAmt.getOperand(0); 3127 } 3128 } 3129 3130 if (ShAmt.getOpcode() == ISD::ADD && 3131 isa<ConstantSDNode>(ShAmt.getOperand(1))) { 3132 uint64_t Imm = ShAmt.getConstantOperandVal(1); 3133 // If we are shifting by X+N where N == 0 mod Size, then just shift by X 3134 // to avoid the ADD. 3135 if (Imm != 0 && Imm % ShiftWidth == 0) { 3136 ShAmt = ShAmt.getOperand(0); 3137 return true; 3138 } 3139 } else if (ShAmt.getOpcode() == ISD::SUB && 3140 isa<ConstantSDNode>(ShAmt.getOperand(0))) { 3141 uint64_t Imm = ShAmt.getConstantOperandVal(0); 3142 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 3143 // generate a NEG instead of a SUB of a constant. 3144 if (Imm != 0 && Imm % ShiftWidth == 0) { 3145 SDLoc DL(ShAmt); 3146 EVT VT = ShAmt.getValueType(); 3147 SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); 3148 unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; 3149 MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, 3150 ShAmt.getOperand(1)); 3151 ShAmt = SDValue(Neg, 0); 3152 return true; 3153 } 3154 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X 3155 // to generate a NOT instead of a SUB of a constant. 3156 if (Imm % ShiftWidth == ShiftWidth - 1) { 3157 SDLoc DL(ShAmt); 3158 EVT VT = ShAmt.getValueType(); 3159 MachineSDNode *Not = CurDAG->getMachineNode( 3160 RISCV::XORI, DL, VT, ShAmt.getOperand(1), 3161 CurDAG->getAllOnesConstant(DL, VT, /*isTarget=*/true)); 3162 ShAmt = SDValue(Not, 0); 3163 return true; 3164 } 3165 } 3166 3167 return true; 3168 } 3169 3170 /// RISC-V doesn't have general instructions for integer setne/seteq, but we can 3171 /// check for equality with 0. This function emits instructions that convert the 3172 /// seteq/setne into something that can be compared with 0. 3173 /// \p ExpectedCCVal indicates the condition code to attempt to match (e.g. 3174 /// ISD::SETNE). 3175 bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, 3176 SDValue &Val) { 3177 assert(ISD::isIntEqualitySetCC(ExpectedCCVal) && 3178 "Unexpected condition code!"); 3179 3180 // We're looking for a setcc. 3181 if (N->getOpcode() != ISD::SETCC) 3182 return false; 3183 3184 // Must be an equality comparison. 3185 ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get(); 3186 if (CCVal != ExpectedCCVal) 3187 return false; 3188 3189 SDValue LHS = N->getOperand(0); 3190 SDValue RHS = N->getOperand(1); 3191 3192 if (!LHS.getValueType().isScalarInteger()) 3193 return false; 3194 3195 // If the RHS side is 0, we don't need any extra instructions, return the LHS. 3196 if (isNullConstant(RHS)) { 3197 Val = LHS; 3198 return true; 3199 } 3200 3201 SDLoc DL(N); 3202 3203 if (auto *C = dyn_cast<ConstantSDNode>(RHS)) { 3204 int64_t CVal = C->getSExtValue(); 3205 // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and 3206 // non-zero otherwise. 3207 if (CVal == -2048) { 3208 Val = SDValue( 3209 CurDAG->getMachineNode( 3210 RISCV::XORI, DL, N->getValueType(0), LHS, 3211 CurDAG->getSignedTargetConstant(CVal, DL, N->getValueType(0))), 3212 0); 3213 return true; 3214 } 3215 // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the 3216 // LHS is equal to the RHS and non-zero otherwise. 3217 if (isInt<12>(CVal) || CVal == 2048) { 3218 Val = SDValue( 3219 CurDAG->getMachineNode( 3220 RISCV::ADDI, DL, N->getValueType(0), LHS, 3221 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))), 3222 0); 3223 return true; 3224 } 3225 if (isPowerOf2_64(CVal) && Subtarget->hasStdExtZbs()) { 3226 Val = SDValue( 3227 CurDAG->getMachineNode( 3228 RISCV::BINVI, DL, N->getValueType(0), LHS, 3229 CurDAG->getTargetConstant(Log2_64(CVal), DL, N->getValueType(0))), 3230 0); 3231 return true; 3232 } 3233 // Same as the addi case above but for larger immediates (signed 26-bit) use 3234 // the QC_E_ADDI instruction from the Xqcilia extension, if available. Avoid 3235 // anything which can be done with a single lui as it might be compressible. 3236 if (Subtarget->hasVendorXqcilia() && isInt<26>(CVal) && 3237 (CVal & 0xFFF) != 0) { 3238 Val = SDValue( 3239 CurDAG->getMachineNode( 3240 RISCV::QC_E_ADDI, DL, N->getValueType(0), LHS, 3241 CurDAG->getSignedTargetConstant(-CVal, DL, N->getValueType(0))), 3242 0); 3243 return true; 3244 } 3245 } 3246 3247 // If nothing else we can XOR the LHS and RHS to produce zero if they are 3248 // equal and a non-zero value if they aren't. 3249 Val = SDValue( 3250 CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0); 3251 return true; 3252 } 3253 3254 bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) { 3255 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && 3256 cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) { 3257 Val = N.getOperand(0); 3258 return true; 3259 } 3260 3261 auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) { 3262 if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1))) 3263 return N; 3264 3265 SDValue N0 = N.getOperand(0); 3266 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 3267 N.getConstantOperandVal(1) == ShiftAmt && 3268 N0.getConstantOperandVal(1) == ShiftAmt) 3269 return N0.getOperand(0); 3270 3271 return N; 3272 }; 3273 3274 MVT VT = N.getSimpleValueType(); 3275 if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) { 3276 Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits); 3277 return true; 3278 } 3279 3280 return false; 3281 } 3282 3283 bool RISCVDAGToDAGISel::selectZExtBits(SDValue N, unsigned Bits, SDValue &Val) { 3284 if (N.getOpcode() == ISD::AND) { 3285 auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3286 if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) { 3287 Val = N.getOperand(0); 3288 return true; 3289 } 3290 } 3291 MVT VT = N.getSimpleValueType(); 3292 APInt Mask = APInt::getBitsSetFrom(VT.getSizeInBits(), Bits); 3293 if (CurDAG->MaskedValueIsZero(N, Mask)) { 3294 Val = N; 3295 return true; 3296 } 3297 3298 return false; 3299 } 3300 3301 /// Look for various patterns that can be done with a SHL that can be folded 3302 /// into a SHXADD. \p ShAmt contains 1, 2, or 3 and is set based on which 3303 /// SHXADD we are trying to match. 3304 bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt, 3305 SDValue &Val) { 3306 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) { 3307 SDValue N0 = N.getOperand(0); 3308 3309 if (bool LeftShift = N0.getOpcode() == ISD::SHL; 3310 (LeftShift || N0.getOpcode() == ISD::SRL) && 3311 isa<ConstantSDNode>(N0.getOperand(1))) { 3312 uint64_t Mask = N.getConstantOperandVal(1); 3313 unsigned C2 = N0.getConstantOperandVal(1); 3314 3315 unsigned XLen = Subtarget->getXLen(); 3316 if (LeftShift) 3317 Mask &= maskTrailingZeros<uint64_t>(C2); 3318 else 3319 Mask &= maskTrailingOnes<uint64_t>(XLen - C2); 3320 3321 if (isShiftedMask_64(Mask)) { 3322 unsigned Leading = XLen - llvm::bit_width(Mask); 3323 unsigned Trailing = llvm::countr_zero(Mask); 3324 if (Trailing != ShAmt) 3325 return false; 3326 3327 unsigned Opcode; 3328 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no 3329 // leading zeros and c3 trailing zeros. We can use an SRLI by c3-c2 3330 // followed by a SHXADD with c3 for the X amount. 3331 if (LeftShift && Leading == 0 && C2 < Trailing) 3332 Opcode = RISCV::SRLI; 3333 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 32-c2 3334 // leading zeros and c3 trailing zeros. We can use an SRLIW by c3-c2 3335 // followed by a SHXADD with c3 for the X amount. 3336 else if (LeftShift && Leading == 32 - C2 && C2 < Trailing) 3337 Opcode = RISCV::SRLIW; 3338 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2 3339 // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3 3340 // followed by a SHXADD using c3 for the X amount. 3341 else if (!LeftShift && Leading == C2) 3342 Opcode = RISCV::SRLI; 3343 // Look for (and (shr y, c2), c1) where c1 is a shifted mask with 32+c2 3344 // leading zeros and c3 trailing zeros. We can use an SRLIW by c2+c3 3345 // followed by a SHXADD using c3 for the X amount. 3346 else if (!LeftShift && Leading == 32 + C2) 3347 Opcode = RISCV::SRLIW; 3348 else 3349 return false; 3350 3351 SDLoc DL(N); 3352 EVT VT = N.getValueType(); 3353 ShAmt = LeftShift ? Trailing - C2 : Trailing + C2; 3354 Val = SDValue( 3355 CurDAG->getMachineNode(Opcode, DL, VT, N0.getOperand(0), 3356 CurDAG->getTargetConstant(ShAmt, DL, VT)), 3357 0); 3358 return true; 3359 } 3360 } else if (N0.getOpcode() == ISD::SRA && N0.hasOneUse() && 3361 isa<ConstantSDNode>(N0.getOperand(1))) { 3362 uint64_t Mask = N.getConstantOperandVal(1); 3363 unsigned C2 = N0.getConstantOperandVal(1); 3364 3365 // Look for (and (sra y, c2), c1) where c1 is a shifted mask with c3 3366 // leading zeros and c4 trailing zeros. If c2 is greater than c3, we can 3367 // use (srli (srai y, c2 - c3), c3 + c4) followed by a SHXADD with c4 as 3368 // the X amount. 3369 if (isShiftedMask_64(Mask)) { 3370 unsigned XLen = Subtarget->getXLen(); 3371 unsigned Leading = XLen - llvm::bit_width(Mask); 3372 unsigned Trailing = llvm::countr_zero(Mask); 3373 if (C2 > Leading && Leading > 0 && Trailing == ShAmt) { 3374 SDLoc DL(N); 3375 EVT VT = N.getValueType(); 3376 Val = SDValue(CurDAG->getMachineNode( 3377 RISCV::SRAI, DL, VT, N0.getOperand(0), 3378 CurDAG->getTargetConstant(C2 - Leading, DL, VT)), 3379 0); 3380 Val = SDValue(CurDAG->getMachineNode( 3381 RISCV::SRLI, DL, VT, Val, 3382 CurDAG->getTargetConstant(Leading + ShAmt, DL, VT)), 3383 0); 3384 return true; 3385 } 3386 } 3387 } 3388 } else if (bool LeftShift = N.getOpcode() == ISD::SHL; 3389 (LeftShift || N.getOpcode() == ISD::SRL) && 3390 isa<ConstantSDNode>(N.getOperand(1))) { 3391 SDValue N0 = N.getOperand(0); 3392 if (N0.getOpcode() == ISD::AND && N0.hasOneUse() && 3393 isa<ConstantSDNode>(N0.getOperand(1))) { 3394 uint64_t Mask = N0.getConstantOperandVal(1); 3395 if (isShiftedMask_64(Mask)) { 3396 unsigned C1 = N.getConstantOperandVal(1); 3397 unsigned XLen = Subtarget->getXLen(); 3398 unsigned Leading = XLen - llvm::bit_width(Mask); 3399 unsigned Trailing = llvm::countr_zero(Mask); 3400 // Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and 3401 // C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD. 3402 if (LeftShift && Leading == 32 && Trailing > 0 && 3403 (Trailing + C1) == ShAmt) { 3404 SDLoc DL(N); 3405 EVT VT = N.getValueType(); 3406 Val = SDValue(CurDAG->getMachineNode( 3407 RISCV::SRLIW, DL, VT, N0.getOperand(0), 3408 CurDAG->getTargetConstant(Trailing, DL, VT)), 3409 0); 3410 return true; 3411 } 3412 // Look for (srl (and X, Mask), C1) where Mask has 32 leading zeros and 3413 // C3 trailing zeros. If C3-C1==ShAmt we can use SRLIW+SHXADD. 3414 if (!LeftShift && Leading == 32 && Trailing > C1 && 3415 (Trailing - C1) == ShAmt) { 3416 SDLoc DL(N); 3417 EVT VT = N.getValueType(); 3418 Val = SDValue(CurDAG->getMachineNode( 3419 RISCV::SRLIW, DL, VT, N0.getOperand(0), 3420 CurDAG->getTargetConstant(Trailing, DL, VT)), 3421 0); 3422 return true; 3423 } 3424 } 3425 } 3426 } 3427 3428 return false; 3429 } 3430 3431 /// Look for various patterns that can be done with a SHL that can be folded 3432 /// into a SHXADD_UW. \p ShAmt contains 1, 2, or 3 and is set based on which 3433 /// SHXADD_UW we are trying to match. 3434 bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt, 3435 SDValue &Val) { 3436 if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1)) && 3437 N.hasOneUse()) { 3438 SDValue N0 = N.getOperand(0); 3439 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 3440 N0.hasOneUse()) { 3441 uint64_t Mask = N.getConstantOperandVal(1); 3442 unsigned C2 = N0.getConstantOperandVal(1); 3443 3444 Mask &= maskTrailingZeros<uint64_t>(C2); 3445 3446 // Look for (and (shl y, c2), c1) where c1 is a shifted mask with 3447 // 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by 3448 // c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount. 3449 if (isShiftedMask_64(Mask)) { 3450 unsigned Leading = llvm::countl_zero(Mask); 3451 unsigned Trailing = llvm::countr_zero(Mask); 3452 if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) { 3453 SDLoc DL(N); 3454 EVT VT = N.getValueType(); 3455 Val = SDValue(CurDAG->getMachineNode( 3456 RISCV::SLLI, DL, VT, N0.getOperand(0), 3457 CurDAG->getTargetConstant(C2 - ShAmt, DL, VT)), 3458 0); 3459 return true; 3460 } 3461 } 3462 } 3463 } 3464 3465 return false; 3466 } 3467 3468 bool RISCVDAGToDAGISel::orDisjoint(const SDNode *N) const { 3469 assert(N->getOpcode() == ISD::OR || N->getOpcode() == RISCVISD::OR_VL); 3470 if (N->getFlags().hasDisjoint()) 3471 return true; 3472 return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1)); 3473 } 3474 3475 bool RISCVDAGToDAGISel::selectImm64IfCheaper(int64_t Imm, int64_t OrigImm, 3476 SDValue N, SDValue &Val) { 3477 int OrigCost = RISCVMatInt::getIntMatCost(APInt(64, OrigImm), 64, *Subtarget, 3478 /*CompressionCost=*/true); 3479 int Cost = RISCVMatInt::getIntMatCost(APInt(64, Imm), 64, *Subtarget, 3480 /*CompressionCost=*/true); 3481 if (OrigCost <= Cost) 3482 return false; 3483 3484 Val = selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), Imm, *Subtarget); 3485 return true; 3486 } 3487 3488 bool RISCVDAGToDAGISel::selectZExtImm32(SDValue N, SDValue &Val) { 3489 if (!isa<ConstantSDNode>(N)) 3490 return false; 3491 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue(); 3492 if ((Imm >> 31) != 1) 3493 return false; 3494 3495 for (const SDNode *U : N->users()) { 3496 switch (U->getOpcode()) { 3497 case ISD::ADD: 3498 break; 3499 case ISD::OR: 3500 if (orDisjoint(U)) 3501 break; 3502 return false; 3503 default: 3504 return false; 3505 } 3506 } 3507 3508 return selectImm64IfCheaper(0xffffffff00000000 | Imm, Imm, N, Val); 3509 } 3510 3511 bool RISCVDAGToDAGISel::selectNegImm(SDValue N, SDValue &Val) { 3512 if (!isa<ConstantSDNode>(N)) 3513 return false; 3514 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue(); 3515 if (isInt<32>(Imm)) 3516 return false; 3517 3518 for (const SDNode *U : N->users()) { 3519 switch (U->getOpcode()) { 3520 case ISD::ADD: 3521 break; 3522 case RISCVISD::VMV_V_X_VL: 3523 if (!all_of(U->users(), [](const SDNode *V) { 3524 return V->getOpcode() == ISD::ADD || 3525 V->getOpcode() == RISCVISD::ADD_VL; 3526 })) 3527 return false; 3528 break; 3529 default: 3530 return false; 3531 } 3532 } 3533 3534 return selectImm64IfCheaper(-Imm, Imm, N, Val); 3535 } 3536 3537 bool RISCVDAGToDAGISel::selectInvLogicImm(SDValue N, SDValue &Val) { 3538 if (!isa<ConstantSDNode>(N)) 3539 return false; 3540 int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue(); 3541 3542 // For 32-bit signed constants, we can only substitute LUI+ADDI with LUI. 3543 if (isInt<32>(Imm) && ((Imm & 0xfff) != 0xfff || Imm == -1)) 3544 return false; 3545 3546 // Abandon this transform if the constant is needed elsewhere. 3547 for (const SDNode *U : N->users()) { 3548 switch (U->getOpcode()) { 3549 case ISD::AND: 3550 case ISD::OR: 3551 case ISD::XOR: 3552 if (!(Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb())) 3553 return false; 3554 break; 3555 case RISCVISD::VMV_V_X_VL: 3556 if (!Subtarget->hasStdExtZvkb()) 3557 return false; 3558 if (!all_of(U->users(), [](const SDNode *V) { 3559 return V->getOpcode() == ISD::AND || 3560 V->getOpcode() == RISCVISD::AND_VL; 3561 })) 3562 return false; 3563 break; 3564 default: 3565 return false; 3566 } 3567 } 3568 3569 if (isInt<32>(Imm)) { 3570 Val = 3571 selectImm(CurDAG, SDLoc(N), N->getSimpleValueType(0), ~Imm, *Subtarget); 3572 return true; 3573 } 3574 3575 // For 64-bit constants, the instruction sequences get complex, 3576 // so we select inverted only if it's cheaper. 3577 return selectImm64IfCheaper(~Imm, Imm, N, Val); 3578 } 3579 3580 static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo, 3581 unsigned Bits, 3582 const TargetInstrInfo *TII) { 3583 unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode()); 3584 3585 if (!MCOpcode) 3586 return false; 3587 3588 const MCInstrDesc &MCID = TII->get(User->getMachineOpcode()); 3589 const uint64_t TSFlags = MCID.TSFlags; 3590 if (!RISCVII::hasSEWOp(TSFlags)) 3591 return false; 3592 assert(RISCVII::hasVLOp(TSFlags)); 3593 3594 unsigned ChainOpIdx = User->getNumOperands() - 1; 3595 bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other; 3596 bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags); 3597 unsigned VLIdx = User->getNumOperands() - HasVecPolicyOp - HasChainOp - 2; 3598 const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1); 3599 3600 if (UserOpNo == VLIdx) 3601 return false; 3602 3603 auto NumDemandedBits = 3604 RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW); 3605 return NumDemandedBits && Bits >= *NumDemandedBits; 3606 } 3607 3608 // Return true if all users of this SDNode* only consume the lower \p Bits. 3609 // This can be used to form W instructions for add/sub/mul/shl even when the 3610 // root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if 3611 // SimplifyDemandedBits has made it so some users see a sext_inreg and some 3612 // don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave 3613 // the add/sub/mul/shl to become non-W instructions. By checking the users we 3614 // may be able to use a W instruction and CSE with the other instruction if 3615 // this has happened. We could try to detect that the CSE opportunity exists 3616 // before doing this, but that would be more complicated. 3617 bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, 3618 const unsigned Depth) const { 3619 assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB || 3620 Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || 3621 Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::AND || 3622 Node->getOpcode() == ISD::OR || Node->getOpcode() == ISD::XOR || 3623 Node->getOpcode() == ISD::SIGN_EXTEND_INREG || 3624 isa<ConstantSDNode>(Node) || Depth != 0) && 3625 "Unexpected opcode"); 3626 3627 if (Depth >= SelectionDAG::MaxRecursionDepth) 3628 return false; 3629 3630 // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked 3631 // the VT. Ensure the type is scalar to avoid wasting time on vectors. 3632 if (Depth == 0 && !Node->getValueType(0).isScalarInteger()) 3633 return false; 3634 3635 for (SDUse &Use : Node->uses()) { 3636 SDNode *User = Use.getUser(); 3637 // Users of this node should have already been instruction selected 3638 if (!User->isMachineOpcode()) 3639 return false; 3640 3641 // TODO: Add more opcodes? 3642 switch (User->getMachineOpcode()) { 3643 default: 3644 if (vectorPseudoHasAllNBitUsers(User, Use.getOperandNo(), Bits, TII)) 3645 break; 3646 return false; 3647 case RISCV::ADDW: 3648 case RISCV::ADDIW: 3649 case RISCV::SUBW: 3650 case RISCV::MULW: 3651 case RISCV::SLLW: 3652 case RISCV::SLLIW: 3653 case RISCV::SRAW: 3654 case RISCV::SRAIW: 3655 case RISCV::SRLW: 3656 case RISCV::SRLIW: 3657 case RISCV::DIVW: 3658 case RISCV::DIVUW: 3659 case RISCV::REMW: 3660 case RISCV::REMUW: 3661 case RISCV::ROLW: 3662 case RISCV::RORW: 3663 case RISCV::RORIW: 3664 case RISCV::CLZW: 3665 case RISCV::CTZW: 3666 case RISCV::CPOPW: 3667 case RISCV::SLLI_UW: 3668 case RISCV::FMV_W_X: 3669 case RISCV::FCVT_H_W: 3670 case RISCV::FCVT_H_W_INX: 3671 case RISCV::FCVT_H_WU: 3672 case RISCV::FCVT_H_WU_INX: 3673 case RISCV::FCVT_S_W: 3674 case RISCV::FCVT_S_W_INX: 3675 case RISCV::FCVT_S_WU: 3676 case RISCV::FCVT_S_WU_INX: 3677 case RISCV::FCVT_D_W: 3678 case RISCV::FCVT_D_W_INX: 3679 case RISCV::FCVT_D_WU: 3680 case RISCV::FCVT_D_WU_INX: 3681 case RISCV::TH_REVW: 3682 case RISCV::TH_SRRIW: 3683 if (Bits >= 32) 3684 break; 3685 return false; 3686 case RISCV::SLL: 3687 case RISCV::SRA: 3688 case RISCV::SRL: 3689 case RISCV::ROL: 3690 case RISCV::ROR: 3691 case RISCV::BSET: 3692 case RISCV::BCLR: 3693 case RISCV::BINV: 3694 // Shift amount operands only use log2(Xlen) bits. 3695 if (Use.getOperandNo() == 1 && Bits >= Log2_32(Subtarget->getXLen())) 3696 break; 3697 return false; 3698 case RISCV::SLLI: 3699 // SLLI only uses the lower (XLen - ShAmt) bits. 3700 if (Bits >= Subtarget->getXLen() - User->getConstantOperandVal(1)) 3701 break; 3702 return false; 3703 case RISCV::ANDI: 3704 if (Bits >= (unsigned)llvm::bit_width(User->getConstantOperandVal(1))) 3705 break; 3706 goto RecCheck; 3707 case RISCV::ORI: { 3708 uint64_t Imm = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue(); 3709 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm)) 3710 break; 3711 [[fallthrough]]; 3712 } 3713 case RISCV::AND: 3714 case RISCV::OR: 3715 case RISCV::XOR: 3716 case RISCV::XORI: 3717 case RISCV::ANDN: 3718 case RISCV::ORN: 3719 case RISCV::XNOR: 3720 case RISCV::SH1ADD: 3721 case RISCV::SH2ADD: 3722 case RISCV::SH3ADD: 3723 RecCheck: 3724 if (hasAllNBitUsers(User, Bits, Depth + 1)) 3725 break; 3726 return false; 3727 case RISCV::SRLI: { 3728 unsigned ShAmt = User->getConstantOperandVal(1); 3729 // If we are shifting right by less than Bits, and users don't demand any 3730 // bits that were shifted into [Bits-1:0], then we can consider this as an 3731 // N-Bit user. 3732 if (Bits > ShAmt && hasAllNBitUsers(User, Bits - ShAmt, Depth + 1)) 3733 break; 3734 return false; 3735 } 3736 case RISCV::SEXT_B: 3737 case RISCV::PACKH: 3738 if (Bits >= 8) 3739 break; 3740 return false; 3741 case RISCV::SEXT_H: 3742 case RISCV::FMV_H_X: 3743 case RISCV::ZEXT_H_RV32: 3744 case RISCV::ZEXT_H_RV64: 3745 case RISCV::PACKW: 3746 if (Bits >= 16) 3747 break; 3748 return false; 3749 case RISCV::PACK: 3750 if (Bits >= (Subtarget->getXLen() / 2)) 3751 break; 3752 return false; 3753 case RISCV::ADD_UW: 3754 case RISCV::SH1ADD_UW: 3755 case RISCV::SH2ADD_UW: 3756 case RISCV::SH3ADD_UW: 3757 // The first operand to add.uw/shXadd.uw is implicitly zero extended from 3758 // 32 bits. 3759 if (Use.getOperandNo() == 0 && Bits >= 32) 3760 break; 3761 return false; 3762 case RISCV::SB: 3763 if (Use.getOperandNo() == 0 && Bits >= 8) 3764 break; 3765 return false; 3766 case RISCV::SH: 3767 if (Use.getOperandNo() == 0 && Bits >= 16) 3768 break; 3769 return false; 3770 case RISCV::SW: 3771 if (Use.getOperandNo() == 0 && Bits >= 32) 3772 break; 3773 return false; 3774 } 3775 } 3776 3777 return true; 3778 } 3779 3780 // Select a constant that can be represented as (sign_extend(imm5) << imm2). 3781 bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, 3782 SDValue &Shl2) { 3783 auto *C = dyn_cast<ConstantSDNode>(N); 3784 if (!C) 3785 return false; 3786 3787 int64_t Offset = C->getSExtValue(); 3788 for (unsigned Shift = 0; Shift < 4; Shift++) { 3789 if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) { 3790 EVT VT = N->getValueType(0); 3791 Simm5 = CurDAG->getSignedTargetConstant(Offset >> Shift, SDLoc(N), VT); 3792 Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), VT); 3793 return true; 3794 } 3795 } 3796 3797 return false; 3798 } 3799 3800 // Select VL as a 5 bit immediate or a value that will become a register. This 3801 // allows us to choose between VSETIVLI or VSETVLI later. 3802 bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { 3803 auto *C = dyn_cast<ConstantSDNode>(N); 3804 if (C && isUInt<5>(C->getZExtValue())) { 3805 VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), 3806 N->getValueType(0)); 3807 } else if (C && C->isAllOnes()) { 3808 // Treat all ones as VLMax. 3809 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 3810 N->getValueType(0)); 3811 } else if (isa<RegisterSDNode>(N) && 3812 cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { 3813 // All our VL operands use an operand that allows GPRNoX0 or an immediate 3814 // as the register class. Convert X0 to a special immediate to pass the 3815 // MachineVerifier. This is recognized specially by the vsetvli insertion 3816 // pass. 3817 VL = CurDAG->getSignedTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), 3818 N->getValueType(0)); 3819 } else { 3820 VL = N; 3821 } 3822 3823 return true; 3824 } 3825 3826 static SDValue findVSplat(SDValue N) { 3827 if (N.getOpcode() == ISD::INSERT_SUBVECTOR) { 3828 if (!N.getOperand(0).isUndef()) 3829 return SDValue(); 3830 N = N.getOperand(1); 3831 } 3832 SDValue Splat = N; 3833 if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL && 3834 Splat.getOpcode() != RISCVISD::VMV_S_X_VL) || 3835 !Splat.getOperand(0).isUndef()) 3836 return SDValue(); 3837 assert(Splat.getNumOperands() == 3 && "Unexpected number of operands"); 3838 return Splat; 3839 } 3840 3841 bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { 3842 SDValue Splat = findVSplat(N); 3843 if (!Splat) 3844 return false; 3845 3846 SplatVal = Splat.getOperand(1); 3847 return true; 3848 } 3849 3850 static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal, 3851 SelectionDAG &DAG, 3852 const RISCVSubtarget &Subtarget, 3853 std::function<bool(int64_t)> ValidateImm, 3854 bool Decrement = false) { 3855 SDValue Splat = findVSplat(N); 3856 if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1))) 3857 return false; 3858 3859 const unsigned SplatEltSize = Splat.getScalarValueSizeInBits(); 3860 assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() && 3861 "Unexpected splat operand type"); 3862 3863 // The semantics of RISCVISD::VMV_V_X_VL is that when the operand 3864 // type is wider than the resulting vector element type: an implicit 3865 // truncation first takes place. Therefore, perform a manual 3866 // truncation/sign-extension in order to ignore any truncated bits and catch 3867 // any zero-extended immediate. 3868 // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first 3869 // sign-extending to (XLenVT -1). 3870 APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize); 3871 3872 int64_t SplatImm = SplatConst.getSExtValue(); 3873 3874 if (!ValidateImm(SplatImm)) 3875 return false; 3876 3877 if (Decrement) 3878 SplatImm -= 1; 3879 3880 SplatVal = 3881 DAG.getSignedTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT()); 3882 return true; 3883 } 3884 3885 bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) { 3886 return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget, 3887 [](int64_t Imm) { return isInt<5>(Imm); }); 3888 } 3889 3890 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { 3891 return selectVSplatImmHelper( 3892 N, SplatVal, *CurDAG, *Subtarget, 3893 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }, 3894 /*Decrement=*/true); 3895 } 3896 3897 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) { 3898 return selectVSplatImmHelper( 3899 N, SplatVal, *CurDAG, *Subtarget, 3900 [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }, 3901 /*Decrement=*/false); 3902 } 3903 3904 bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, 3905 SDValue &SplatVal) { 3906 return selectVSplatImmHelper( 3907 N, SplatVal, *CurDAG, *Subtarget, 3908 [](int64_t Imm) { 3909 return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16); 3910 }, 3911 /*Decrement=*/true); 3912 } 3913 3914 bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, 3915 SDValue &SplatVal) { 3916 return selectVSplatImmHelper( 3917 N, SplatVal, *CurDAG, *Subtarget, 3918 [Bits](int64_t Imm) { return isUIntN(Bits, Imm); }); 3919 } 3920 3921 bool RISCVDAGToDAGISel::selectVSplatImm64Neg(SDValue N, SDValue &SplatVal) { 3922 SDValue Splat = findVSplat(N); 3923 return Splat && selectNegImm(Splat.getOperand(1), SplatVal); 3924 } 3925 3926 bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { 3927 auto IsExtOrTrunc = [](SDValue N) { 3928 switch (N->getOpcode()) { 3929 case ISD::SIGN_EXTEND: 3930 case ISD::ZERO_EXTEND: 3931 // There's no passthru on these _VL nodes so any VL/mask is ok, since any 3932 // inactive elements will be undef. 3933 case RISCVISD::TRUNCATE_VECTOR_VL: 3934 case RISCVISD::VSEXT_VL: 3935 case RISCVISD::VZEXT_VL: 3936 return true; 3937 default: 3938 return false; 3939 } 3940 }; 3941 3942 // We can have multiple nested nodes, so unravel them all if needed. 3943 while (IsExtOrTrunc(N)) { 3944 if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8) 3945 return false; 3946 N = N->getOperand(0); 3947 } 3948 3949 return selectVSplat(N, SplatVal); 3950 } 3951 3952 bool RISCVDAGToDAGISel::selectScalarFPAsInt(SDValue N, SDValue &Imm) { 3953 // Allow bitcasts from XLenVT -> FP. 3954 if (N.getOpcode() == ISD::BITCAST && 3955 N.getOperand(0).getValueType() == Subtarget->getXLenVT()) { 3956 Imm = N.getOperand(0); 3957 return true; 3958 } 3959 // Allow moves from XLenVT to FP. 3960 if (N.getOpcode() == RISCVISD::FMV_H_X || 3961 N.getOpcode() == RISCVISD::FMV_W_X_RV64) { 3962 Imm = N.getOperand(0); 3963 return true; 3964 } 3965 3966 // Otherwise, look for FP constants that can materialized with scalar int. 3967 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode()); 3968 if (!CFP) 3969 return false; 3970 const APFloat &APF = CFP->getValueAPF(); 3971 // td can handle +0.0 already. 3972 if (APF.isPosZero()) 3973 return false; 3974 3975 MVT VT = CFP->getSimpleValueType(0); 3976 3977 MVT XLenVT = Subtarget->getXLenVT(); 3978 if (VT == MVT::f64 && !Subtarget->is64Bit()) { 3979 assert(APF.isNegZero() && "Unexpected constant."); 3980 return false; 3981 } 3982 SDLoc DL(N); 3983 Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(), 3984 *Subtarget); 3985 return true; 3986 } 3987 3988 bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, 3989 SDValue &Imm) { 3990 if (auto *C = dyn_cast<ConstantSDNode>(N)) { 3991 int64_t ImmVal = SignExtend64(C->getSExtValue(), Width); 3992 3993 if (!isInt<5>(ImmVal)) 3994 return false; 3995 3996 Imm = CurDAG->getSignedTargetConstant(ImmVal, SDLoc(N), 3997 Subtarget->getXLenVT()); 3998 return true; 3999 } 4000 4001 return false; 4002 } 4003 4004 // Try to remove sext.w if the input is a W instruction or can be made into 4005 // a W instruction cheaply. 4006 bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { 4007 // Look for the sext.w pattern, addiw rd, rs1, 0. 4008 if (N->getMachineOpcode() != RISCV::ADDIW || 4009 !isNullConstant(N->getOperand(1))) 4010 return false; 4011 4012 SDValue N0 = N->getOperand(0); 4013 if (!N0.isMachineOpcode()) 4014 return false; 4015 4016 switch (N0.getMachineOpcode()) { 4017 default: 4018 break; 4019 case RISCV::ADD: 4020 case RISCV::ADDI: 4021 case RISCV::SUB: 4022 case RISCV::MUL: 4023 case RISCV::SLLI: { 4024 // Convert sext.w+add/sub/mul to their W instructions. This will create 4025 // a new independent instruction. This improves latency. 4026 unsigned Opc; 4027 switch (N0.getMachineOpcode()) { 4028 default: 4029 llvm_unreachable("Unexpected opcode!"); 4030 case RISCV::ADD: Opc = RISCV::ADDW; break; 4031 case RISCV::ADDI: Opc = RISCV::ADDIW; break; 4032 case RISCV::SUB: Opc = RISCV::SUBW; break; 4033 case RISCV::MUL: Opc = RISCV::MULW; break; 4034 case RISCV::SLLI: Opc = RISCV::SLLIW; break; 4035 } 4036 4037 SDValue N00 = N0.getOperand(0); 4038 SDValue N01 = N0.getOperand(1); 4039 4040 // Shift amount needs to be uimm5. 4041 if (N0.getMachineOpcode() == RISCV::SLLI && 4042 !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue())) 4043 break; 4044 4045 SDNode *Result = 4046 CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), 4047 N00, N01); 4048 ReplaceUses(N, Result); 4049 return true; 4050 } 4051 case RISCV::ADDW: 4052 case RISCV::ADDIW: 4053 case RISCV::SUBW: 4054 case RISCV::MULW: 4055 case RISCV::SLLIW: 4056 case RISCV::PACKW: 4057 case RISCV::TH_MULAW: 4058 case RISCV::TH_MULAH: 4059 case RISCV::TH_MULSW: 4060 case RISCV::TH_MULSH: 4061 if (N0.getValueType() == MVT::i32) 4062 break; 4063 4064 // Result is already sign extended just remove the sext.w. 4065 // NOTE: We only handle the nodes that are selected with hasAllWUsers. 4066 ReplaceUses(N, N0.getNode()); 4067 return true; 4068 } 4069 4070 return false; 4071 } 4072 4073 static bool usesAllOnesMask(SDValue MaskOp) { 4074 const auto IsVMSet = [](unsigned Opc) { 4075 return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || 4076 Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || 4077 Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || 4078 Opc == RISCV::PseudoVMSET_M_B8; 4079 }; 4080 4081 // TODO: Check that the VMSET is the expected bitwidth? The pseudo has 4082 // undefined behaviour if it's the wrong bitwidth, so we could choose to 4083 // assume that it's all-ones? Same applies to its VL. 4084 return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode()); 4085 } 4086 4087 static bool isImplicitDef(SDValue V) { 4088 if (!V.isMachineOpcode()) 4089 return false; 4090 if (V.getMachineOpcode() == TargetOpcode::REG_SEQUENCE) { 4091 for (unsigned I = 1; I < V.getNumOperands(); I += 2) 4092 if (!isImplicitDef(V.getOperand(I))) 4093 return false; 4094 return true; 4095 } 4096 return V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; 4097 } 4098 4099 // Optimize masked RVV pseudo instructions with a known all-ones mask to their 4100 // corresponding "unmasked" pseudo versions. 4101 bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { 4102 const RISCV::RISCVMaskedPseudoInfo *I = 4103 RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); 4104 if (!I) 4105 return false; 4106 4107 unsigned MaskOpIdx = I->MaskOpIdx; 4108 if (!usesAllOnesMask(N->getOperand(MaskOpIdx))) 4109 return false; 4110 4111 // There are two classes of pseudos in the table - compares and 4112 // everything else. See the comment on RISCVMaskedPseudo for details. 4113 const unsigned Opc = I->UnmaskedPseudo; 4114 const MCInstrDesc &MCID = TII->get(Opc); 4115 const bool HasPassthru = RISCVII::isFirstDefTiedToFirstUse(MCID); 4116 4117 const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode()); 4118 const bool MaskedHasPassthru = RISCVII::isFirstDefTiedToFirstUse(MaskedMCID); 4119 4120 assert((RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) || 4121 !RISCVII::hasVecPolicyOp(MCID.TSFlags)) && 4122 "Unmasked pseudo has policy but masked pseudo doesn't?"); 4123 assert(RISCVII::hasVecPolicyOp(MCID.TSFlags) == HasPassthru && 4124 "Unexpected pseudo structure"); 4125 assert(!(HasPassthru && !MaskedHasPassthru) && 4126 "Unmasked pseudo has passthru but masked pseudo doesn't?"); 4127 4128 SmallVector<SDValue, 8> Ops; 4129 // Skip the passthru operand at index 0 if the unmasked don't have one. 4130 bool ShouldSkip = !HasPassthru && MaskedHasPassthru; 4131 bool DropPolicy = !RISCVII::hasVecPolicyOp(MCID.TSFlags) && 4132 RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags); 4133 bool HasChainOp = 4134 N->getOperand(N->getNumOperands() - 1).getValueType() == MVT::Other; 4135 unsigned LastOpNum = N->getNumOperands() - 1 - HasChainOp; 4136 for (unsigned I = ShouldSkip, E = N->getNumOperands(); I != E; I++) { 4137 // Skip the mask 4138 SDValue Op = N->getOperand(I); 4139 if (I == MaskOpIdx) 4140 continue; 4141 if (DropPolicy && I == LastOpNum) 4142 continue; 4143 Ops.push_back(Op); 4144 } 4145 4146 MachineSDNode *Result = 4147 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 4148 4149 if (!N->memoperands_empty()) 4150 CurDAG->setNodeMemRefs(Result, N->memoperands()); 4151 4152 Result->setFlags(N->getFlags()); 4153 ReplaceUses(N, Result); 4154 4155 return true; 4156 } 4157 4158 /// If our passthru is an implicit_def, use noreg instead. This side 4159 /// steps issues with MachineCSE not being able to CSE expressions with 4160 /// IMPLICIT_DEF operands while preserving the semantic intent. See 4161 /// pr64282 for context. Note that this transform is the last one 4162 /// performed at ISEL DAG to DAG. 4163 bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() { 4164 bool MadeChange = false; 4165 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 4166 4167 while (Position != CurDAG->allnodes_begin()) { 4168 SDNode *N = &*--Position; 4169 if (N->use_empty() || !N->isMachineOpcode()) 4170 continue; 4171 4172 const unsigned Opc = N->getMachineOpcode(); 4173 if (!RISCVVPseudosTable::getPseudoInfo(Opc) || 4174 !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) || 4175 !isImplicitDef(N->getOperand(0))) 4176 continue; 4177 4178 SmallVector<SDValue> Ops; 4179 Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0))); 4180 for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) { 4181 SDValue Op = N->getOperand(I); 4182 Ops.push_back(Op); 4183 } 4184 4185 MachineSDNode *Result = 4186 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); 4187 Result->setFlags(N->getFlags()); 4188 CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands()); 4189 ReplaceUses(N, Result); 4190 MadeChange = true; 4191 } 4192 return MadeChange; 4193 } 4194 4195 4196 // This pass converts a legalized DAG into a RISCV-specific DAG, ready 4197 // for instruction scheduling. 4198 FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, 4199 CodeGenOptLevel OptLevel) { 4200 return new RISCVDAGToDAGISelLegacy(TM, OptLevel); 4201 } 4202 4203 char RISCVDAGToDAGISelLegacy::ID = 0; 4204 4205 RISCVDAGToDAGISelLegacy::RISCVDAGToDAGISelLegacy(RISCVTargetMachine &TM, 4206 CodeGenOptLevel OptLevel) 4207 : SelectionDAGISelLegacy( 4208 ID, std::make_unique<RISCVDAGToDAGISel>(TM, OptLevel)) {} 4209 4210 INITIALIZE_PASS(RISCVDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false) 4211