1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H 15 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H 16 17 #include "NVPTX.h" 18 #include "llvm/CodeGen/SelectionDAG.h" 19 #include "llvm/CodeGen/TargetLowering.h" 20 21 namespace llvm { 22 namespace NVPTXISD { 23 enum NodeType : unsigned { 24 // Start the numbering from where ISD NodeType finishes. 25 FIRST_NUMBER = ISD::BUILTIN_OP_END, 26 Wrapper, 27 CALL, 28 RET_GLUE, 29 LOAD_PARAM, 30 DeclareParam, 31 DeclareScalarParam, 32 DeclareRetParam, 33 DeclareRet, 34 DeclareScalarRet, 35 PrintCall, 36 PrintConvergentCall, 37 PrintCallUni, 38 PrintConvergentCallUni, 39 CallArgBegin, 40 CallArg, 41 LastCallArg, 42 CallArgEnd, 43 CallVoid, 44 CallVal, 45 CallSymbol, 46 Prototype, 47 MoveParam, 48 PseudoUseParam, 49 RETURN, 50 CallSeqBegin, 51 CallSeqEnd, 52 CallPrototype, 53 ProxyReg, 54 FUN_SHFL_CLAMP, 55 FUN_SHFR_CLAMP, 56 MUL_WIDE_SIGNED, 57 MUL_WIDE_UNSIGNED, 58 IMAD, 59 SETP_F16X2, 60 SETP_BF16X2, 61 BFE, 62 BFI, 63 PRMT, 64 DYNAMIC_STACKALLOC, 65 Dummy, 66 67 LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, 68 LoadV4, 69 LDGV2, // LDG.v2 70 LDGV4, // LDG.v4 71 LDUV2, // LDU.v2 72 LDUV4, // LDU.v4 73 StoreV2, 74 StoreV4, 75 LoadParam, 76 LoadParamV2, 77 LoadParamV4, 78 StoreParam, 79 StoreParamV2, 80 StoreParamV4, 81 StoreParamS32, // to sext and store a <32bit value, not used currently 82 StoreParamU32, // to zext and store a <32bit value, not used currently 83 StoreRetval, 84 StoreRetvalV2, 85 StoreRetvalV4, 86 87 // Texture intrinsics 88 Tex1DFloatS32, 89 Tex1DFloatFloat, 90 Tex1DFloatFloatLevel, 91 Tex1DFloatFloatGrad, 92 Tex1DS32S32, 93 Tex1DS32Float, 94 Tex1DS32FloatLevel, 95 Tex1DS32FloatGrad, 96 Tex1DU32S32, 97 Tex1DU32Float, 98 Tex1DU32FloatLevel, 99 Tex1DU32FloatGrad, 100 Tex1DArrayFloatS32, 101 Tex1DArrayFloatFloat, 102 Tex1DArrayFloatFloatLevel, 103 Tex1DArrayFloatFloatGrad, 104 Tex1DArrayS32S32, 105 Tex1DArrayS32Float, 106 Tex1DArrayS32FloatLevel, 107 Tex1DArrayS32FloatGrad, 108 Tex1DArrayU32S32, 109 Tex1DArrayU32Float, 110 Tex1DArrayU32FloatLevel, 111 Tex1DArrayU32FloatGrad, 112 Tex2DFloatS32, 113 Tex2DFloatFloat, 114 Tex2DFloatFloatLevel, 115 Tex2DFloatFloatGrad, 116 Tex2DS32S32, 117 Tex2DS32Float, 118 Tex2DS32FloatLevel, 119 Tex2DS32FloatGrad, 120 Tex2DU32S32, 121 Tex2DU32Float, 122 Tex2DU32FloatLevel, 123 Tex2DU32FloatGrad, 124 Tex2DArrayFloatS32, 125 Tex2DArrayFloatFloat, 126 Tex2DArrayFloatFloatLevel, 127 Tex2DArrayFloatFloatGrad, 128 Tex2DArrayS32S32, 129 Tex2DArrayS32Float, 130 Tex2DArrayS32FloatLevel, 131 Tex2DArrayS32FloatGrad, 132 Tex2DArrayU32S32, 133 Tex2DArrayU32Float, 134 Tex2DArrayU32FloatLevel, 135 Tex2DArrayU32FloatGrad, 136 Tex3DFloatS32, 137 Tex3DFloatFloat, 138 Tex3DFloatFloatLevel, 139 Tex3DFloatFloatGrad, 140 Tex3DS32S32, 141 Tex3DS32Float, 142 Tex3DS32FloatLevel, 143 Tex3DS32FloatGrad, 144 Tex3DU32S32, 145 Tex3DU32Float, 146 Tex3DU32FloatLevel, 147 Tex3DU32FloatGrad, 148 TexCubeFloatFloat, 149 TexCubeFloatFloatLevel, 150 TexCubeS32Float, 151 TexCubeS32FloatLevel, 152 TexCubeU32Float, 153 TexCubeU32FloatLevel, 154 TexCubeArrayFloatFloat, 155 TexCubeArrayFloatFloatLevel, 156 TexCubeArrayS32Float, 157 TexCubeArrayS32FloatLevel, 158 TexCubeArrayU32Float, 159 TexCubeArrayU32FloatLevel, 160 Tld4R2DFloatFloat, 161 Tld4G2DFloatFloat, 162 Tld4B2DFloatFloat, 163 Tld4A2DFloatFloat, 164 Tld4R2DS64Float, 165 Tld4G2DS64Float, 166 Tld4B2DS64Float, 167 Tld4A2DS64Float, 168 Tld4R2DU64Float, 169 Tld4G2DU64Float, 170 Tld4B2DU64Float, 171 Tld4A2DU64Float, 172 TexUnified1DFloatS32, 173 TexUnified1DFloatFloat, 174 TexUnified1DFloatFloatLevel, 175 TexUnified1DFloatFloatGrad, 176 TexUnified1DS32S32, 177 TexUnified1DS32Float, 178 TexUnified1DS32FloatLevel, 179 TexUnified1DS32FloatGrad, 180 TexUnified1DU32S32, 181 TexUnified1DU32Float, 182 TexUnified1DU32FloatLevel, 183 TexUnified1DU32FloatGrad, 184 TexUnified1DArrayFloatS32, 185 TexUnified1DArrayFloatFloat, 186 TexUnified1DArrayFloatFloatLevel, 187 TexUnified1DArrayFloatFloatGrad, 188 TexUnified1DArrayS32S32, 189 TexUnified1DArrayS32Float, 190 TexUnified1DArrayS32FloatLevel, 191 TexUnified1DArrayS32FloatGrad, 192 TexUnified1DArrayU32S32, 193 TexUnified1DArrayU32Float, 194 TexUnified1DArrayU32FloatLevel, 195 TexUnified1DArrayU32FloatGrad, 196 TexUnified2DFloatS32, 197 TexUnified2DFloatFloat, 198 TexUnified2DFloatFloatLevel, 199 TexUnified2DFloatFloatGrad, 200 TexUnified2DS32S32, 201 TexUnified2DS32Float, 202 TexUnified2DS32FloatLevel, 203 TexUnified2DS32FloatGrad, 204 TexUnified2DU32S32, 205 TexUnified2DU32Float, 206 TexUnified2DU32FloatLevel, 207 TexUnified2DU32FloatGrad, 208 TexUnified2DArrayFloatS32, 209 TexUnified2DArrayFloatFloat, 210 TexUnified2DArrayFloatFloatLevel, 211 TexUnified2DArrayFloatFloatGrad, 212 TexUnified2DArrayS32S32, 213 TexUnified2DArrayS32Float, 214 TexUnified2DArrayS32FloatLevel, 215 TexUnified2DArrayS32FloatGrad, 216 TexUnified2DArrayU32S32, 217 TexUnified2DArrayU32Float, 218 TexUnified2DArrayU32FloatLevel, 219 TexUnified2DArrayU32FloatGrad, 220 TexUnified3DFloatS32, 221 TexUnified3DFloatFloat, 222 TexUnified3DFloatFloatLevel, 223 TexUnified3DFloatFloatGrad, 224 TexUnified3DS32S32, 225 TexUnified3DS32Float, 226 TexUnified3DS32FloatLevel, 227 TexUnified3DS32FloatGrad, 228 TexUnified3DU32S32, 229 TexUnified3DU32Float, 230 TexUnified3DU32FloatLevel, 231 TexUnified3DU32FloatGrad, 232 TexUnifiedCubeFloatFloat, 233 TexUnifiedCubeFloatFloatLevel, 234 TexUnifiedCubeS32Float, 235 TexUnifiedCubeS32FloatLevel, 236 TexUnifiedCubeU32Float, 237 TexUnifiedCubeU32FloatLevel, 238 TexUnifiedCubeArrayFloatFloat, 239 TexUnifiedCubeArrayFloatFloatLevel, 240 TexUnifiedCubeArrayS32Float, 241 TexUnifiedCubeArrayS32FloatLevel, 242 TexUnifiedCubeArrayU32Float, 243 TexUnifiedCubeArrayU32FloatLevel, 244 TexUnifiedCubeFloatFloatGrad, 245 TexUnifiedCubeS32FloatGrad, 246 TexUnifiedCubeU32FloatGrad, 247 TexUnifiedCubeArrayFloatFloatGrad, 248 TexUnifiedCubeArrayS32FloatGrad, 249 TexUnifiedCubeArrayU32FloatGrad, 250 Tld4UnifiedR2DFloatFloat, 251 Tld4UnifiedG2DFloatFloat, 252 Tld4UnifiedB2DFloatFloat, 253 Tld4UnifiedA2DFloatFloat, 254 Tld4UnifiedR2DS64Float, 255 Tld4UnifiedG2DS64Float, 256 Tld4UnifiedB2DS64Float, 257 Tld4UnifiedA2DS64Float, 258 Tld4UnifiedR2DU64Float, 259 Tld4UnifiedG2DU64Float, 260 Tld4UnifiedB2DU64Float, 261 Tld4UnifiedA2DU64Float, 262 263 // Surface intrinsics 264 Suld1DI8Clamp, 265 Suld1DI16Clamp, 266 Suld1DI32Clamp, 267 Suld1DI64Clamp, 268 Suld1DV2I8Clamp, 269 Suld1DV2I16Clamp, 270 Suld1DV2I32Clamp, 271 Suld1DV2I64Clamp, 272 Suld1DV4I8Clamp, 273 Suld1DV4I16Clamp, 274 Suld1DV4I32Clamp, 275 276 Suld1DArrayI8Clamp, 277 Suld1DArrayI16Clamp, 278 Suld1DArrayI32Clamp, 279 Suld1DArrayI64Clamp, 280 Suld1DArrayV2I8Clamp, 281 Suld1DArrayV2I16Clamp, 282 Suld1DArrayV2I32Clamp, 283 Suld1DArrayV2I64Clamp, 284 Suld1DArrayV4I8Clamp, 285 Suld1DArrayV4I16Clamp, 286 Suld1DArrayV4I32Clamp, 287 288 Suld2DI8Clamp, 289 Suld2DI16Clamp, 290 Suld2DI32Clamp, 291 Suld2DI64Clamp, 292 Suld2DV2I8Clamp, 293 Suld2DV2I16Clamp, 294 Suld2DV2I32Clamp, 295 Suld2DV2I64Clamp, 296 Suld2DV4I8Clamp, 297 Suld2DV4I16Clamp, 298 Suld2DV4I32Clamp, 299 300 Suld2DArrayI8Clamp, 301 Suld2DArrayI16Clamp, 302 Suld2DArrayI32Clamp, 303 Suld2DArrayI64Clamp, 304 Suld2DArrayV2I8Clamp, 305 Suld2DArrayV2I16Clamp, 306 Suld2DArrayV2I32Clamp, 307 Suld2DArrayV2I64Clamp, 308 Suld2DArrayV4I8Clamp, 309 Suld2DArrayV4I16Clamp, 310 Suld2DArrayV4I32Clamp, 311 312 Suld3DI8Clamp, 313 Suld3DI16Clamp, 314 Suld3DI32Clamp, 315 Suld3DI64Clamp, 316 Suld3DV2I8Clamp, 317 Suld3DV2I16Clamp, 318 Suld3DV2I32Clamp, 319 Suld3DV2I64Clamp, 320 Suld3DV4I8Clamp, 321 Suld3DV4I16Clamp, 322 Suld3DV4I32Clamp, 323 324 Suld1DI8Trap, 325 Suld1DI16Trap, 326 Suld1DI32Trap, 327 Suld1DI64Trap, 328 Suld1DV2I8Trap, 329 Suld1DV2I16Trap, 330 Suld1DV2I32Trap, 331 Suld1DV2I64Trap, 332 Suld1DV4I8Trap, 333 Suld1DV4I16Trap, 334 Suld1DV4I32Trap, 335 336 Suld1DArrayI8Trap, 337 Suld1DArrayI16Trap, 338 Suld1DArrayI32Trap, 339 Suld1DArrayI64Trap, 340 Suld1DArrayV2I8Trap, 341 Suld1DArrayV2I16Trap, 342 Suld1DArrayV2I32Trap, 343 Suld1DArrayV2I64Trap, 344 Suld1DArrayV4I8Trap, 345 Suld1DArrayV4I16Trap, 346 Suld1DArrayV4I32Trap, 347 348 Suld2DI8Trap, 349 Suld2DI16Trap, 350 Suld2DI32Trap, 351 Suld2DI64Trap, 352 Suld2DV2I8Trap, 353 Suld2DV2I16Trap, 354 Suld2DV2I32Trap, 355 Suld2DV2I64Trap, 356 Suld2DV4I8Trap, 357 Suld2DV4I16Trap, 358 Suld2DV4I32Trap, 359 360 Suld2DArrayI8Trap, 361 Suld2DArrayI16Trap, 362 Suld2DArrayI32Trap, 363 Suld2DArrayI64Trap, 364 Suld2DArrayV2I8Trap, 365 Suld2DArrayV2I16Trap, 366 Suld2DArrayV2I32Trap, 367 Suld2DArrayV2I64Trap, 368 Suld2DArrayV4I8Trap, 369 Suld2DArrayV4I16Trap, 370 Suld2DArrayV4I32Trap, 371 372 Suld3DI8Trap, 373 Suld3DI16Trap, 374 Suld3DI32Trap, 375 Suld3DI64Trap, 376 Suld3DV2I8Trap, 377 Suld3DV2I16Trap, 378 Suld3DV2I32Trap, 379 Suld3DV2I64Trap, 380 Suld3DV4I8Trap, 381 Suld3DV4I16Trap, 382 Suld3DV4I32Trap, 383 384 Suld1DI8Zero, 385 Suld1DI16Zero, 386 Suld1DI32Zero, 387 Suld1DI64Zero, 388 Suld1DV2I8Zero, 389 Suld1DV2I16Zero, 390 Suld1DV2I32Zero, 391 Suld1DV2I64Zero, 392 Suld1DV4I8Zero, 393 Suld1DV4I16Zero, 394 Suld1DV4I32Zero, 395 396 Suld1DArrayI8Zero, 397 Suld1DArrayI16Zero, 398 Suld1DArrayI32Zero, 399 Suld1DArrayI64Zero, 400 Suld1DArrayV2I8Zero, 401 Suld1DArrayV2I16Zero, 402 Suld1DArrayV2I32Zero, 403 Suld1DArrayV2I64Zero, 404 Suld1DArrayV4I8Zero, 405 Suld1DArrayV4I16Zero, 406 Suld1DArrayV4I32Zero, 407 408 Suld2DI8Zero, 409 Suld2DI16Zero, 410 Suld2DI32Zero, 411 Suld2DI64Zero, 412 Suld2DV2I8Zero, 413 Suld2DV2I16Zero, 414 Suld2DV2I32Zero, 415 Suld2DV2I64Zero, 416 Suld2DV4I8Zero, 417 Suld2DV4I16Zero, 418 Suld2DV4I32Zero, 419 420 Suld2DArrayI8Zero, 421 Suld2DArrayI16Zero, 422 Suld2DArrayI32Zero, 423 Suld2DArrayI64Zero, 424 Suld2DArrayV2I8Zero, 425 Suld2DArrayV2I16Zero, 426 Suld2DArrayV2I32Zero, 427 Suld2DArrayV2I64Zero, 428 Suld2DArrayV4I8Zero, 429 Suld2DArrayV4I16Zero, 430 Suld2DArrayV4I32Zero, 431 432 Suld3DI8Zero, 433 Suld3DI16Zero, 434 Suld3DI32Zero, 435 Suld3DI64Zero, 436 Suld3DV2I8Zero, 437 Suld3DV2I16Zero, 438 Suld3DV2I32Zero, 439 Suld3DV2I64Zero, 440 Suld3DV4I8Zero, 441 Suld3DV4I16Zero, 442 Suld3DV4I32Zero 443 }; 444 } 445 446 class NVPTXSubtarget; 447 448 //===--------------------------------------------------------------------===// 449 // TargetLowering Implementation 450 //===--------------------------------------------------------------------===// 451 class NVPTXTargetLowering : public TargetLowering { 452 public: 453 explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM, 454 const NVPTXSubtarget &STI); 455 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 456 457 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 458 459 const char *getTargetNodeName(unsigned Opcode) const override; 460 461 bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, 462 MachineFunction &MF, 463 unsigned Intrinsic) const override; 464 465 Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx, 466 const DataLayout &DL) const; 467 468 /// getFunctionParamOptimizedAlign - since function arguments are passed via 469 /// .param space, we may want to increase their alignment in a way that 470 /// ensures that we can effectively vectorize their loads & stores. We can 471 /// increase alignment only if the function has internal or has private 472 /// linkage as for other linkage types callers may already rely on default 473 /// alignment. To allow using 128-bit vectorized loads/stores, this function 474 /// ensures that alignment is 16 or greater. 475 Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy, 476 const DataLayout &DL) const; 477 478 /// Helper for computing alignment of a device function byval parameter. 479 Align getFunctionByValParamAlign(const Function *F, Type *ArgTy, 480 Align InitialAlign, 481 const DataLayout &DL) const; 482 483 // Helper for getting a function parameter name. Name is composed from 484 // its index and the function name. Negative index corresponds to special 485 // parameter (unsized array) used for passing variable arguments. 486 std::string getParamName(const Function *F, int Idx) const; 487 488 /// isLegalAddressingMode - Return true if the addressing mode represented 489 /// by AM is legal for this target, for a load/store of the specified type 490 /// Used to guide target specific optimizations, like loop strength 491 /// reduction (LoopStrengthReduce.cpp) and memory optimization for 492 /// address mode (CodeGenPrepare.cpp) 493 bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, 494 unsigned AS, 495 Instruction *I = nullptr) const override; 496 isTruncateFree(Type * SrcTy,Type * DstTy)497 bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { 498 // Truncating 64-bit to 32-bit is free in SASS. 499 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) 500 return false; 501 return SrcTy->getPrimitiveSizeInBits() == 64 && 502 DstTy->getPrimitiveSizeInBits() == 32; 503 } 504 getSetCCResultType(const DataLayout & DL,LLVMContext & Ctx,EVT VT)505 EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx, 506 EVT VT) const override { 507 if (VT.isVector()) 508 return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); 509 return MVT::i1; 510 } 511 512 ConstraintType getConstraintType(StringRef Constraint) const override; 513 std::pair<unsigned, const TargetRegisterClass *> 514 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 515 StringRef Constraint, MVT VT) const override; 516 517 SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, 518 bool isVarArg, 519 const SmallVectorImpl<ISD::InputArg> &Ins, 520 const SDLoc &dl, SelectionDAG &DAG, 521 SmallVectorImpl<SDValue> &InVals) const override; 522 523 SDValue LowerCall(CallLoweringInfo &CLI, 524 SmallVectorImpl<SDValue> &InVals) const override; 525 526 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 527 528 std::string 529 getPrototype(const DataLayout &DL, Type *, const ArgListTy &, 530 const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment, 531 std::optional<std::pair<unsigned, const APInt &>> VAInfo, 532 const CallBase &CB, unsigned UniqueCallSite) const; 533 534 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 535 const SmallVectorImpl<ISD::OutputArg> &Outs, 536 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, 537 SelectionDAG &DAG) const override; 538 539 void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, 540 std::vector<SDValue> &Ops, 541 SelectionDAG &DAG) const override; 542 543 const NVPTXTargetMachine *nvTM; 544 545 // PTX always uses 32-bit shift amounts getScalarShiftAmountTy(const DataLayout &,EVT)546 MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { 547 return MVT::i32; 548 } 549 550 TargetLoweringBase::LegalizeTypeAction 551 getPreferredVectorAction(MVT VT) const override; 552 553 // Get the degree of precision we want from 32-bit floating point division 554 // operations. 555 // 556 // 0 - Use ptx div.approx 557 // 1 - Use ptx.div.full (approximate, but less so than div.approx) 558 // 2 - Use IEEE-compliant div instructions, if available. 559 int getDivF32Level() const; 560 561 // Get whether we should use a precise or approximate 32-bit floating point 562 // sqrt instruction. 563 bool usePrecSqrtF32() const; 564 565 // Get whether we should use instructions that flush floating-point denormals 566 // to sign-preserving zero. 567 bool useF32FTZ(const MachineFunction &MF) const; 568 569 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 570 int &ExtraSteps, bool &UseOneConst, 571 bool Reciprocal) const override; 572 combineRepeatedFPDivisors()573 unsigned combineRepeatedFPDivisors() const override { return 2; } 574 575 bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const; 576 bool allowUnsafeFPMath(MachineFunction &MF) const; 577 isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT)578 bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, 579 EVT) const override { 580 return true; 581 } 582 enableAggressiveFMAFusion(EVT VT)583 bool enableAggressiveFMAFusion(EVT VT) const override { return true; } 584 585 // The default is to transform llvm.ctlz(x, false) (where false indicates that 586 // x == 0 is not undefined behavior) into a branch that checks whether x is 0 587 // and avoids calling ctlz in that case. We have a dedicated ctlz 588 // instruction, so we say that ctlz is cheap to speculate. isCheapToSpeculateCtlz(Type * Ty)589 bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; } 590 shouldCastAtomicLoadInIR(LoadInst * LI)591 AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { 592 return AtomicExpansionKind::None; 593 } 594 shouldCastAtomicStoreInIR(StoreInst * SI)595 AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { 596 return AtomicExpansionKind::None; 597 } 598 599 AtomicExpansionKind 600 shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; 601 aggressivelyPreferBuildVectorSources(EVT VecVT)602 bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override { 603 // There's rarely any point of packing something into a vector type if we 604 // already have the source data. 605 return true; 606 } 607 608 private: 609 const NVPTXSubtarget &STI; // cache the subtarget here 610 SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; 611 612 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 613 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 614 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 615 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 616 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 617 618 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; 619 SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const; 620 SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const; 621 622 SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 623 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 624 625 SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; 626 SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; 627 628 SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; 629 SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const; 630 631 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 632 SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; 633 SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; 634 635 SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; 636 SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; 637 638 SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const; 639 640 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 641 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 642 643 SDValue LowerCopyToReg_128(SDValue Op, SelectionDAG &DAG) const; 644 unsigned getNumRegisters(LLVMContext &Context, EVT VT, 645 std::optional<MVT> RegisterVT) const override; 646 bool 647 splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, 648 SDValue *Parts, unsigned NumParts, MVT PartVT, 649 std::optional<CallingConv::ID> CC) const override; 650 651 void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, 652 SelectionDAG &DAG) const override; 653 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 654 655 Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx, 656 const DataLayout &DL) const; 657 }; 658 659 } // namespace llvm 660 661 #endif 662