xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h (revision 56b17de1e8360fe131d425de20b5e75ff3ea897c)
1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16 
17 #include "NVPTX.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 
21 namespace llvm {
22 namespace NVPTXISD {
23 enum NodeType : unsigned {
24   // Start the numbering from where ISD NodeType finishes.
25   FIRST_NUMBER = ISD::BUILTIN_OP_END,
26   Wrapper,
27   CALL,
28   RET_GLUE,
29   LOAD_PARAM,
30   DeclareParam,
31   DeclareScalarParam,
32   DeclareRetParam,
33   DeclareRet,
34   DeclareScalarRet,
35   PrintCall,
36   PrintConvergentCall,
37   PrintCallUni,
38   PrintConvergentCallUni,
39   CallArgBegin,
40   CallArg,
41   LastCallArg,
42   CallArgEnd,
43   CallVoid,
44   CallVal,
45   CallSymbol,
46   Prototype,
47   MoveParam,
48   PseudoUseParam,
49   RETURN,
50   CallSeqBegin,
51   CallSeqEnd,
52   CallPrototype,
53   ProxyReg,
54   FUN_SHFL_CLAMP,
55   FUN_SHFR_CLAMP,
56   MUL_WIDE_SIGNED,
57   MUL_WIDE_UNSIGNED,
58   IMAD,
59   SETP_F16X2,
60   SETP_BF16X2,
61   BFE,
62   BFI,
63   PRMT,
64   DYNAMIC_STACKALLOC,
65   Dummy,
66 
67   LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
68   LoadV4,
69   LDGV2, // LDG.v2
70   LDGV4, // LDG.v4
71   LDUV2, // LDU.v2
72   LDUV4, // LDU.v4
73   StoreV2,
74   StoreV4,
75   LoadParam,
76   LoadParamV2,
77   LoadParamV4,
78   StoreParam,
79   StoreParamV2,
80   StoreParamV4,
81   StoreParamS32, // to sext and store a <32bit value, not used currently
82   StoreParamU32, // to zext and store a <32bit value, not used currently
83   StoreRetval,
84   StoreRetvalV2,
85   StoreRetvalV4,
86 
87   // Texture intrinsics
88   Tex1DFloatS32,
89   Tex1DFloatFloat,
90   Tex1DFloatFloatLevel,
91   Tex1DFloatFloatGrad,
92   Tex1DS32S32,
93   Tex1DS32Float,
94   Tex1DS32FloatLevel,
95   Tex1DS32FloatGrad,
96   Tex1DU32S32,
97   Tex1DU32Float,
98   Tex1DU32FloatLevel,
99   Tex1DU32FloatGrad,
100   Tex1DArrayFloatS32,
101   Tex1DArrayFloatFloat,
102   Tex1DArrayFloatFloatLevel,
103   Tex1DArrayFloatFloatGrad,
104   Tex1DArrayS32S32,
105   Tex1DArrayS32Float,
106   Tex1DArrayS32FloatLevel,
107   Tex1DArrayS32FloatGrad,
108   Tex1DArrayU32S32,
109   Tex1DArrayU32Float,
110   Tex1DArrayU32FloatLevel,
111   Tex1DArrayU32FloatGrad,
112   Tex2DFloatS32,
113   Tex2DFloatFloat,
114   Tex2DFloatFloatLevel,
115   Tex2DFloatFloatGrad,
116   Tex2DS32S32,
117   Tex2DS32Float,
118   Tex2DS32FloatLevel,
119   Tex2DS32FloatGrad,
120   Tex2DU32S32,
121   Tex2DU32Float,
122   Tex2DU32FloatLevel,
123   Tex2DU32FloatGrad,
124   Tex2DArrayFloatS32,
125   Tex2DArrayFloatFloat,
126   Tex2DArrayFloatFloatLevel,
127   Tex2DArrayFloatFloatGrad,
128   Tex2DArrayS32S32,
129   Tex2DArrayS32Float,
130   Tex2DArrayS32FloatLevel,
131   Tex2DArrayS32FloatGrad,
132   Tex2DArrayU32S32,
133   Tex2DArrayU32Float,
134   Tex2DArrayU32FloatLevel,
135   Tex2DArrayU32FloatGrad,
136   Tex3DFloatS32,
137   Tex3DFloatFloat,
138   Tex3DFloatFloatLevel,
139   Tex3DFloatFloatGrad,
140   Tex3DS32S32,
141   Tex3DS32Float,
142   Tex3DS32FloatLevel,
143   Tex3DS32FloatGrad,
144   Tex3DU32S32,
145   Tex3DU32Float,
146   Tex3DU32FloatLevel,
147   Tex3DU32FloatGrad,
148   TexCubeFloatFloat,
149   TexCubeFloatFloatLevel,
150   TexCubeS32Float,
151   TexCubeS32FloatLevel,
152   TexCubeU32Float,
153   TexCubeU32FloatLevel,
154   TexCubeArrayFloatFloat,
155   TexCubeArrayFloatFloatLevel,
156   TexCubeArrayS32Float,
157   TexCubeArrayS32FloatLevel,
158   TexCubeArrayU32Float,
159   TexCubeArrayU32FloatLevel,
160   Tld4R2DFloatFloat,
161   Tld4G2DFloatFloat,
162   Tld4B2DFloatFloat,
163   Tld4A2DFloatFloat,
164   Tld4R2DS64Float,
165   Tld4G2DS64Float,
166   Tld4B2DS64Float,
167   Tld4A2DS64Float,
168   Tld4R2DU64Float,
169   Tld4G2DU64Float,
170   Tld4B2DU64Float,
171   Tld4A2DU64Float,
172   TexUnified1DFloatS32,
173   TexUnified1DFloatFloat,
174   TexUnified1DFloatFloatLevel,
175   TexUnified1DFloatFloatGrad,
176   TexUnified1DS32S32,
177   TexUnified1DS32Float,
178   TexUnified1DS32FloatLevel,
179   TexUnified1DS32FloatGrad,
180   TexUnified1DU32S32,
181   TexUnified1DU32Float,
182   TexUnified1DU32FloatLevel,
183   TexUnified1DU32FloatGrad,
184   TexUnified1DArrayFloatS32,
185   TexUnified1DArrayFloatFloat,
186   TexUnified1DArrayFloatFloatLevel,
187   TexUnified1DArrayFloatFloatGrad,
188   TexUnified1DArrayS32S32,
189   TexUnified1DArrayS32Float,
190   TexUnified1DArrayS32FloatLevel,
191   TexUnified1DArrayS32FloatGrad,
192   TexUnified1DArrayU32S32,
193   TexUnified1DArrayU32Float,
194   TexUnified1DArrayU32FloatLevel,
195   TexUnified1DArrayU32FloatGrad,
196   TexUnified2DFloatS32,
197   TexUnified2DFloatFloat,
198   TexUnified2DFloatFloatLevel,
199   TexUnified2DFloatFloatGrad,
200   TexUnified2DS32S32,
201   TexUnified2DS32Float,
202   TexUnified2DS32FloatLevel,
203   TexUnified2DS32FloatGrad,
204   TexUnified2DU32S32,
205   TexUnified2DU32Float,
206   TexUnified2DU32FloatLevel,
207   TexUnified2DU32FloatGrad,
208   TexUnified2DArrayFloatS32,
209   TexUnified2DArrayFloatFloat,
210   TexUnified2DArrayFloatFloatLevel,
211   TexUnified2DArrayFloatFloatGrad,
212   TexUnified2DArrayS32S32,
213   TexUnified2DArrayS32Float,
214   TexUnified2DArrayS32FloatLevel,
215   TexUnified2DArrayS32FloatGrad,
216   TexUnified2DArrayU32S32,
217   TexUnified2DArrayU32Float,
218   TexUnified2DArrayU32FloatLevel,
219   TexUnified2DArrayU32FloatGrad,
220   TexUnified3DFloatS32,
221   TexUnified3DFloatFloat,
222   TexUnified3DFloatFloatLevel,
223   TexUnified3DFloatFloatGrad,
224   TexUnified3DS32S32,
225   TexUnified3DS32Float,
226   TexUnified3DS32FloatLevel,
227   TexUnified3DS32FloatGrad,
228   TexUnified3DU32S32,
229   TexUnified3DU32Float,
230   TexUnified3DU32FloatLevel,
231   TexUnified3DU32FloatGrad,
232   TexUnifiedCubeFloatFloat,
233   TexUnifiedCubeFloatFloatLevel,
234   TexUnifiedCubeS32Float,
235   TexUnifiedCubeS32FloatLevel,
236   TexUnifiedCubeU32Float,
237   TexUnifiedCubeU32FloatLevel,
238   TexUnifiedCubeArrayFloatFloat,
239   TexUnifiedCubeArrayFloatFloatLevel,
240   TexUnifiedCubeArrayS32Float,
241   TexUnifiedCubeArrayS32FloatLevel,
242   TexUnifiedCubeArrayU32Float,
243   TexUnifiedCubeArrayU32FloatLevel,
244   TexUnifiedCubeFloatFloatGrad,
245   TexUnifiedCubeS32FloatGrad,
246   TexUnifiedCubeU32FloatGrad,
247   TexUnifiedCubeArrayFloatFloatGrad,
248   TexUnifiedCubeArrayS32FloatGrad,
249   TexUnifiedCubeArrayU32FloatGrad,
250   Tld4UnifiedR2DFloatFloat,
251   Tld4UnifiedG2DFloatFloat,
252   Tld4UnifiedB2DFloatFloat,
253   Tld4UnifiedA2DFloatFloat,
254   Tld4UnifiedR2DS64Float,
255   Tld4UnifiedG2DS64Float,
256   Tld4UnifiedB2DS64Float,
257   Tld4UnifiedA2DS64Float,
258   Tld4UnifiedR2DU64Float,
259   Tld4UnifiedG2DU64Float,
260   Tld4UnifiedB2DU64Float,
261   Tld4UnifiedA2DU64Float,
262 
263   // Surface intrinsics
264   Suld1DI8Clamp,
265   Suld1DI16Clamp,
266   Suld1DI32Clamp,
267   Suld1DI64Clamp,
268   Suld1DV2I8Clamp,
269   Suld1DV2I16Clamp,
270   Suld1DV2I32Clamp,
271   Suld1DV2I64Clamp,
272   Suld1DV4I8Clamp,
273   Suld1DV4I16Clamp,
274   Suld1DV4I32Clamp,
275 
276   Suld1DArrayI8Clamp,
277   Suld1DArrayI16Clamp,
278   Suld1DArrayI32Clamp,
279   Suld1DArrayI64Clamp,
280   Suld1DArrayV2I8Clamp,
281   Suld1DArrayV2I16Clamp,
282   Suld1DArrayV2I32Clamp,
283   Suld1DArrayV2I64Clamp,
284   Suld1DArrayV4I8Clamp,
285   Suld1DArrayV4I16Clamp,
286   Suld1DArrayV4I32Clamp,
287 
288   Suld2DI8Clamp,
289   Suld2DI16Clamp,
290   Suld2DI32Clamp,
291   Suld2DI64Clamp,
292   Suld2DV2I8Clamp,
293   Suld2DV2I16Clamp,
294   Suld2DV2I32Clamp,
295   Suld2DV2I64Clamp,
296   Suld2DV4I8Clamp,
297   Suld2DV4I16Clamp,
298   Suld2DV4I32Clamp,
299 
300   Suld2DArrayI8Clamp,
301   Suld2DArrayI16Clamp,
302   Suld2DArrayI32Clamp,
303   Suld2DArrayI64Clamp,
304   Suld2DArrayV2I8Clamp,
305   Suld2DArrayV2I16Clamp,
306   Suld2DArrayV2I32Clamp,
307   Suld2DArrayV2I64Clamp,
308   Suld2DArrayV4I8Clamp,
309   Suld2DArrayV4I16Clamp,
310   Suld2DArrayV4I32Clamp,
311 
312   Suld3DI8Clamp,
313   Suld3DI16Clamp,
314   Suld3DI32Clamp,
315   Suld3DI64Clamp,
316   Suld3DV2I8Clamp,
317   Suld3DV2I16Clamp,
318   Suld3DV2I32Clamp,
319   Suld3DV2I64Clamp,
320   Suld3DV4I8Clamp,
321   Suld3DV4I16Clamp,
322   Suld3DV4I32Clamp,
323 
324   Suld1DI8Trap,
325   Suld1DI16Trap,
326   Suld1DI32Trap,
327   Suld1DI64Trap,
328   Suld1DV2I8Trap,
329   Suld1DV2I16Trap,
330   Suld1DV2I32Trap,
331   Suld1DV2I64Trap,
332   Suld1DV4I8Trap,
333   Suld1DV4I16Trap,
334   Suld1DV4I32Trap,
335 
336   Suld1DArrayI8Trap,
337   Suld1DArrayI16Trap,
338   Suld1DArrayI32Trap,
339   Suld1DArrayI64Trap,
340   Suld1DArrayV2I8Trap,
341   Suld1DArrayV2I16Trap,
342   Suld1DArrayV2I32Trap,
343   Suld1DArrayV2I64Trap,
344   Suld1DArrayV4I8Trap,
345   Suld1DArrayV4I16Trap,
346   Suld1DArrayV4I32Trap,
347 
348   Suld2DI8Trap,
349   Suld2DI16Trap,
350   Suld2DI32Trap,
351   Suld2DI64Trap,
352   Suld2DV2I8Trap,
353   Suld2DV2I16Trap,
354   Suld2DV2I32Trap,
355   Suld2DV2I64Trap,
356   Suld2DV4I8Trap,
357   Suld2DV4I16Trap,
358   Suld2DV4I32Trap,
359 
360   Suld2DArrayI8Trap,
361   Suld2DArrayI16Trap,
362   Suld2DArrayI32Trap,
363   Suld2DArrayI64Trap,
364   Suld2DArrayV2I8Trap,
365   Suld2DArrayV2I16Trap,
366   Suld2DArrayV2I32Trap,
367   Suld2DArrayV2I64Trap,
368   Suld2DArrayV4I8Trap,
369   Suld2DArrayV4I16Trap,
370   Suld2DArrayV4I32Trap,
371 
372   Suld3DI8Trap,
373   Suld3DI16Trap,
374   Suld3DI32Trap,
375   Suld3DI64Trap,
376   Suld3DV2I8Trap,
377   Suld3DV2I16Trap,
378   Suld3DV2I32Trap,
379   Suld3DV2I64Trap,
380   Suld3DV4I8Trap,
381   Suld3DV4I16Trap,
382   Suld3DV4I32Trap,
383 
384   Suld1DI8Zero,
385   Suld1DI16Zero,
386   Suld1DI32Zero,
387   Suld1DI64Zero,
388   Suld1DV2I8Zero,
389   Suld1DV2I16Zero,
390   Suld1DV2I32Zero,
391   Suld1DV2I64Zero,
392   Suld1DV4I8Zero,
393   Suld1DV4I16Zero,
394   Suld1DV4I32Zero,
395 
396   Suld1DArrayI8Zero,
397   Suld1DArrayI16Zero,
398   Suld1DArrayI32Zero,
399   Suld1DArrayI64Zero,
400   Suld1DArrayV2I8Zero,
401   Suld1DArrayV2I16Zero,
402   Suld1DArrayV2I32Zero,
403   Suld1DArrayV2I64Zero,
404   Suld1DArrayV4I8Zero,
405   Suld1DArrayV4I16Zero,
406   Suld1DArrayV4I32Zero,
407 
408   Suld2DI8Zero,
409   Suld2DI16Zero,
410   Suld2DI32Zero,
411   Suld2DI64Zero,
412   Suld2DV2I8Zero,
413   Suld2DV2I16Zero,
414   Suld2DV2I32Zero,
415   Suld2DV2I64Zero,
416   Suld2DV4I8Zero,
417   Suld2DV4I16Zero,
418   Suld2DV4I32Zero,
419 
420   Suld2DArrayI8Zero,
421   Suld2DArrayI16Zero,
422   Suld2DArrayI32Zero,
423   Suld2DArrayI64Zero,
424   Suld2DArrayV2I8Zero,
425   Suld2DArrayV2I16Zero,
426   Suld2DArrayV2I32Zero,
427   Suld2DArrayV2I64Zero,
428   Suld2DArrayV4I8Zero,
429   Suld2DArrayV4I16Zero,
430   Suld2DArrayV4I32Zero,
431 
432   Suld3DI8Zero,
433   Suld3DI16Zero,
434   Suld3DI32Zero,
435   Suld3DI64Zero,
436   Suld3DV2I8Zero,
437   Suld3DV2I16Zero,
438   Suld3DV2I32Zero,
439   Suld3DV2I64Zero,
440   Suld3DV4I8Zero,
441   Suld3DV4I16Zero,
442   Suld3DV4I32Zero
443 };
444 }
445 
446 class NVPTXSubtarget;
447 
448 //===--------------------------------------------------------------------===//
449 // TargetLowering Implementation
450 //===--------------------------------------------------------------------===//
451 class NVPTXTargetLowering : public TargetLowering {
452 public:
453   explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
454                                const NVPTXSubtarget &STI);
455   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
456 
457   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
458 
459   const char *getTargetNodeName(unsigned Opcode) const override;
460 
461   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
462                           MachineFunction &MF,
463                           unsigned Intrinsic) const override;
464 
465   Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx,
466                                      const DataLayout &DL) const;
467 
468   /// getFunctionParamOptimizedAlign - since function arguments are passed via
469   /// .param space, we may want to increase their alignment in a way that
470   /// ensures that we can effectively vectorize their loads & stores. We can
471   /// increase alignment only if the function has internal or has private
472   /// linkage as for other linkage types callers may already rely on default
473   /// alignment. To allow using 128-bit vectorized loads/stores, this function
474   /// ensures that alignment is 16 or greater.
475   Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy,
476                                        const DataLayout &DL) const;
477 
478   /// Helper for computing alignment of a device function byval parameter.
479   Align getFunctionByValParamAlign(const Function *F, Type *ArgTy,
480                                    Align InitialAlign,
481                                    const DataLayout &DL) const;
482 
483   // Helper for getting a function parameter name. Name is composed from
484   // its index and the function name. Negative index corresponds to special
485   // parameter (unsized array) used for passing variable arguments.
486   std::string getParamName(const Function *F, int Idx) const;
487 
488   /// isLegalAddressingMode - Return true if the addressing mode represented
489   /// by AM is legal for this target, for a load/store of the specified type
490   /// Used to guide target specific optimizations, like loop strength
491   /// reduction (LoopStrengthReduce.cpp) and memory optimization for
492   /// address mode (CodeGenPrepare.cpp)
493   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
494                              unsigned AS,
495                              Instruction *I = nullptr) const override;
496 
497   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
498     // Truncating 64-bit to 32-bit is free in SASS.
499     if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
500       return false;
501     return SrcTy->getPrimitiveSizeInBits() == 64 &&
502            DstTy->getPrimitiveSizeInBits() == 32;
503   }
504 
505   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
506                          EVT VT) const override {
507     if (VT.isVector())
508       return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
509     return MVT::i1;
510   }
511 
512   ConstraintType getConstraintType(StringRef Constraint) const override;
513   std::pair<unsigned, const TargetRegisterClass *>
514   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
515                                StringRef Constraint, MVT VT) const override;
516 
517   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
518                                bool isVarArg,
519                                const SmallVectorImpl<ISD::InputArg> &Ins,
520                                const SDLoc &dl, SelectionDAG &DAG,
521                                SmallVectorImpl<SDValue> &InVals) const override;
522 
523   SDValue LowerCall(CallLoweringInfo &CLI,
524                     SmallVectorImpl<SDValue> &InVals) const override;
525 
526   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
527 
528   std::string
529   getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
530                const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment,
531                std::optional<std::pair<unsigned, const APInt &>> VAInfo,
532                const CallBase &CB, unsigned UniqueCallSite) const;
533 
534   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
535                       const SmallVectorImpl<ISD::OutputArg> &Outs,
536                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
537                       SelectionDAG &DAG) const override;
538 
539   void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
540                                     std::vector<SDValue> &Ops,
541                                     SelectionDAG &DAG) const override;
542 
543   const NVPTXTargetMachine *nvTM;
544 
545   // PTX always uses 32-bit shift amounts
546   MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
547     return MVT::i32;
548   }
549 
550   TargetLoweringBase::LegalizeTypeAction
551   getPreferredVectorAction(MVT VT) const override;
552 
553   // Get the degree of precision we want from 32-bit floating point division
554   // operations.
555   //
556   //  0 - Use ptx div.approx
557   //  1 - Use ptx.div.full (approximate, but less so than div.approx)
558   //  2 - Use IEEE-compliant div instructions, if available.
559   int getDivF32Level() const;
560 
561   // Get whether we should use a precise or approximate 32-bit floating point
562   // sqrt instruction.
563   bool usePrecSqrtF32() const;
564 
565   // Get whether we should use instructions that flush floating-point denormals
566   // to sign-preserving zero.
567   bool useF32FTZ(const MachineFunction &MF) const;
568 
569   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
570                           int &ExtraSteps, bool &UseOneConst,
571                           bool Reciprocal) const override;
572 
573   unsigned combineRepeatedFPDivisors() const override { return 2; }
574 
575   bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
576   bool allowUnsafeFPMath(MachineFunction &MF) const;
577 
578   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
579                                   EVT) const override {
580     return true;
581   }
582 
583   bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
584 
585   // The default is to transform llvm.ctlz(x, false) (where false indicates that
586   // x == 0 is not undefined behavior) into a branch that checks whether x is 0
587   // and avoids calling ctlz in that case.  We have a dedicated ctlz
588   // instruction, so we say that ctlz is cheap to speculate.
589   bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; }
590 
591   AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
592     return AtomicExpansionKind::None;
593   }
594 
595   AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override {
596     return AtomicExpansionKind::None;
597   }
598 
599   AtomicExpansionKind
600   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
601 
602   bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
603     // There's rarely any point of packing something into a vector type if we
604     // already have the source data.
605     return true;
606   }
607 
608 private:
609   const NVPTXSubtarget &STI; // cache the subtarget here
610   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
611 
612   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
613   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
614   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
615   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
616   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
617 
618   SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
619   SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
620   SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
621 
622   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
623   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
624 
625   SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
626   SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
627 
628   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
629   SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
630 
631   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
632   SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
633   SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
634 
635   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
636   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
637 
638   SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
639 
640   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
641   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
642 
643   SDValue LowerCopyToReg_128(SDValue Op, SelectionDAG &DAG) const;
644   unsigned getNumRegisters(LLVMContext &Context, EVT VT,
645                            std::optional<MVT> RegisterVT) const override;
646   bool
647   splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
648                               SDValue *Parts, unsigned NumParts, MVT PartVT,
649                               std::optional<CallingConv::ID> CC) const override;
650 
651   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
652                           SelectionDAG &DAG) const override;
653   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
654 
655   Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx,
656                              const DataLayout &DL) const;
657 };
658 
659 } // namespace llvm
660 
661 #endif
662