xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h (revision b9128a37faafede823eb456aa65a11ac69997284)
1 //===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that NVPTX uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15 #define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16 
17 #include "NVPTX.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 
21 namespace llvm {
22 namespace NVPTXISD {
23 enum NodeType : unsigned {
24   // Start the numbering from where ISD NodeType finishes.
25   FIRST_NUMBER = ISD::BUILTIN_OP_END,
26   Wrapper,
27   CALL,
28   RET_GLUE,
29   LOAD_PARAM,
30   DeclareParam,
31   DeclareScalarParam,
32   DeclareRetParam,
33   DeclareRet,
34   DeclareScalarRet,
35   PrintCall,
36   PrintConvergentCall,
37   PrintCallUni,
38   PrintConvergentCallUni,
39   CallArgBegin,
40   CallArg,
41   LastCallArg,
42   CallArgEnd,
43   CallVoid,
44   CallVal,
45   CallSymbol,
46   Prototype,
47   MoveParam,
48   PseudoUseParam,
49   RETURN,
50   CallSeqBegin,
51   CallSeqEnd,
52   CallPrototype,
53   ProxyReg,
54   FUN_SHFL_CLAMP,
55   FUN_SHFR_CLAMP,
56   MUL_WIDE_SIGNED,
57   MUL_WIDE_UNSIGNED,
58   IMAD,
59   SETP_F16X2,
60   SETP_BF16X2,
61   BFE,
62   BFI,
63   PRMT,
64   Dummy,
65 
66   LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
67   LoadV4,
68   LDGV2, // LDG.v2
69   LDGV4, // LDG.v4
70   LDUV2, // LDU.v2
71   LDUV4, // LDU.v4
72   StoreV2,
73   StoreV4,
74   LoadParam,
75   LoadParamV2,
76   LoadParamV4,
77   StoreParam,
78   StoreParamV2,
79   StoreParamV4,
80   StoreParamS32, // to sext and store a <32bit value, not used currently
81   StoreParamU32, // to zext and store a <32bit value, not used currently
82   StoreRetval,
83   StoreRetvalV2,
84   StoreRetvalV4,
85 
86   // Texture intrinsics
87   Tex1DFloatS32,
88   Tex1DFloatFloat,
89   Tex1DFloatFloatLevel,
90   Tex1DFloatFloatGrad,
91   Tex1DS32S32,
92   Tex1DS32Float,
93   Tex1DS32FloatLevel,
94   Tex1DS32FloatGrad,
95   Tex1DU32S32,
96   Tex1DU32Float,
97   Tex1DU32FloatLevel,
98   Tex1DU32FloatGrad,
99   Tex1DArrayFloatS32,
100   Tex1DArrayFloatFloat,
101   Tex1DArrayFloatFloatLevel,
102   Tex1DArrayFloatFloatGrad,
103   Tex1DArrayS32S32,
104   Tex1DArrayS32Float,
105   Tex1DArrayS32FloatLevel,
106   Tex1DArrayS32FloatGrad,
107   Tex1DArrayU32S32,
108   Tex1DArrayU32Float,
109   Tex1DArrayU32FloatLevel,
110   Tex1DArrayU32FloatGrad,
111   Tex2DFloatS32,
112   Tex2DFloatFloat,
113   Tex2DFloatFloatLevel,
114   Tex2DFloatFloatGrad,
115   Tex2DS32S32,
116   Tex2DS32Float,
117   Tex2DS32FloatLevel,
118   Tex2DS32FloatGrad,
119   Tex2DU32S32,
120   Tex2DU32Float,
121   Tex2DU32FloatLevel,
122   Tex2DU32FloatGrad,
123   Tex2DArrayFloatS32,
124   Tex2DArrayFloatFloat,
125   Tex2DArrayFloatFloatLevel,
126   Tex2DArrayFloatFloatGrad,
127   Tex2DArrayS32S32,
128   Tex2DArrayS32Float,
129   Tex2DArrayS32FloatLevel,
130   Tex2DArrayS32FloatGrad,
131   Tex2DArrayU32S32,
132   Tex2DArrayU32Float,
133   Tex2DArrayU32FloatLevel,
134   Tex2DArrayU32FloatGrad,
135   Tex3DFloatS32,
136   Tex3DFloatFloat,
137   Tex3DFloatFloatLevel,
138   Tex3DFloatFloatGrad,
139   Tex3DS32S32,
140   Tex3DS32Float,
141   Tex3DS32FloatLevel,
142   Tex3DS32FloatGrad,
143   Tex3DU32S32,
144   Tex3DU32Float,
145   Tex3DU32FloatLevel,
146   Tex3DU32FloatGrad,
147   TexCubeFloatFloat,
148   TexCubeFloatFloatLevel,
149   TexCubeS32Float,
150   TexCubeS32FloatLevel,
151   TexCubeU32Float,
152   TexCubeU32FloatLevel,
153   TexCubeArrayFloatFloat,
154   TexCubeArrayFloatFloatLevel,
155   TexCubeArrayS32Float,
156   TexCubeArrayS32FloatLevel,
157   TexCubeArrayU32Float,
158   TexCubeArrayU32FloatLevel,
159   Tld4R2DFloatFloat,
160   Tld4G2DFloatFloat,
161   Tld4B2DFloatFloat,
162   Tld4A2DFloatFloat,
163   Tld4R2DS64Float,
164   Tld4G2DS64Float,
165   Tld4B2DS64Float,
166   Tld4A2DS64Float,
167   Tld4R2DU64Float,
168   Tld4G2DU64Float,
169   Tld4B2DU64Float,
170   Tld4A2DU64Float,
171   TexUnified1DFloatS32,
172   TexUnified1DFloatFloat,
173   TexUnified1DFloatFloatLevel,
174   TexUnified1DFloatFloatGrad,
175   TexUnified1DS32S32,
176   TexUnified1DS32Float,
177   TexUnified1DS32FloatLevel,
178   TexUnified1DS32FloatGrad,
179   TexUnified1DU32S32,
180   TexUnified1DU32Float,
181   TexUnified1DU32FloatLevel,
182   TexUnified1DU32FloatGrad,
183   TexUnified1DArrayFloatS32,
184   TexUnified1DArrayFloatFloat,
185   TexUnified1DArrayFloatFloatLevel,
186   TexUnified1DArrayFloatFloatGrad,
187   TexUnified1DArrayS32S32,
188   TexUnified1DArrayS32Float,
189   TexUnified1DArrayS32FloatLevel,
190   TexUnified1DArrayS32FloatGrad,
191   TexUnified1DArrayU32S32,
192   TexUnified1DArrayU32Float,
193   TexUnified1DArrayU32FloatLevel,
194   TexUnified1DArrayU32FloatGrad,
195   TexUnified2DFloatS32,
196   TexUnified2DFloatFloat,
197   TexUnified2DFloatFloatLevel,
198   TexUnified2DFloatFloatGrad,
199   TexUnified2DS32S32,
200   TexUnified2DS32Float,
201   TexUnified2DS32FloatLevel,
202   TexUnified2DS32FloatGrad,
203   TexUnified2DU32S32,
204   TexUnified2DU32Float,
205   TexUnified2DU32FloatLevel,
206   TexUnified2DU32FloatGrad,
207   TexUnified2DArrayFloatS32,
208   TexUnified2DArrayFloatFloat,
209   TexUnified2DArrayFloatFloatLevel,
210   TexUnified2DArrayFloatFloatGrad,
211   TexUnified2DArrayS32S32,
212   TexUnified2DArrayS32Float,
213   TexUnified2DArrayS32FloatLevel,
214   TexUnified2DArrayS32FloatGrad,
215   TexUnified2DArrayU32S32,
216   TexUnified2DArrayU32Float,
217   TexUnified2DArrayU32FloatLevel,
218   TexUnified2DArrayU32FloatGrad,
219   TexUnified3DFloatS32,
220   TexUnified3DFloatFloat,
221   TexUnified3DFloatFloatLevel,
222   TexUnified3DFloatFloatGrad,
223   TexUnified3DS32S32,
224   TexUnified3DS32Float,
225   TexUnified3DS32FloatLevel,
226   TexUnified3DS32FloatGrad,
227   TexUnified3DU32S32,
228   TexUnified3DU32Float,
229   TexUnified3DU32FloatLevel,
230   TexUnified3DU32FloatGrad,
231   TexUnifiedCubeFloatFloat,
232   TexUnifiedCubeFloatFloatLevel,
233   TexUnifiedCubeS32Float,
234   TexUnifiedCubeS32FloatLevel,
235   TexUnifiedCubeU32Float,
236   TexUnifiedCubeU32FloatLevel,
237   TexUnifiedCubeArrayFloatFloat,
238   TexUnifiedCubeArrayFloatFloatLevel,
239   TexUnifiedCubeArrayS32Float,
240   TexUnifiedCubeArrayS32FloatLevel,
241   TexUnifiedCubeArrayU32Float,
242   TexUnifiedCubeArrayU32FloatLevel,
243   TexUnifiedCubeFloatFloatGrad,
244   TexUnifiedCubeS32FloatGrad,
245   TexUnifiedCubeU32FloatGrad,
246   TexUnifiedCubeArrayFloatFloatGrad,
247   TexUnifiedCubeArrayS32FloatGrad,
248   TexUnifiedCubeArrayU32FloatGrad,
249   Tld4UnifiedR2DFloatFloat,
250   Tld4UnifiedG2DFloatFloat,
251   Tld4UnifiedB2DFloatFloat,
252   Tld4UnifiedA2DFloatFloat,
253   Tld4UnifiedR2DS64Float,
254   Tld4UnifiedG2DS64Float,
255   Tld4UnifiedB2DS64Float,
256   Tld4UnifiedA2DS64Float,
257   Tld4UnifiedR2DU64Float,
258   Tld4UnifiedG2DU64Float,
259   Tld4UnifiedB2DU64Float,
260   Tld4UnifiedA2DU64Float,
261 
262   // Surface intrinsics
263   Suld1DI8Clamp,
264   Suld1DI16Clamp,
265   Suld1DI32Clamp,
266   Suld1DI64Clamp,
267   Suld1DV2I8Clamp,
268   Suld1DV2I16Clamp,
269   Suld1DV2I32Clamp,
270   Suld1DV2I64Clamp,
271   Suld1DV4I8Clamp,
272   Suld1DV4I16Clamp,
273   Suld1DV4I32Clamp,
274 
275   Suld1DArrayI8Clamp,
276   Suld1DArrayI16Clamp,
277   Suld1DArrayI32Clamp,
278   Suld1DArrayI64Clamp,
279   Suld1DArrayV2I8Clamp,
280   Suld1DArrayV2I16Clamp,
281   Suld1DArrayV2I32Clamp,
282   Suld1DArrayV2I64Clamp,
283   Suld1DArrayV4I8Clamp,
284   Suld1DArrayV4I16Clamp,
285   Suld1DArrayV4I32Clamp,
286 
287   Suld2DI8Clamp,
288   Suld2DI16Clamp,
289   Suld2DI32Clamp,
290   Suld2DI64Clamp,
291   Suld2DV2I8Clamp,
292   Suld2DV2I16Clamp,
293   Suld2DV2I32Clamp,
294   Suld2DV2I64Clamp,
295   Suld2DV4I8Clamp,
296   Suld2DV4I16Clamp,
297   Suld2DV4I32Clamp,
298 
299   Suld2DArrayI8Clamp,
300   Suld2DArrayI16Clamp,
301   Suld2DArrayI32Clamp,
302   Suld2DArrayI64Clamp,
303   Suld2DArrayV2I8Clamp,
304   Suld2DArrayV2I16Clamp,
305   Suld2DArrayV2I32Clamp,
306   Suld2DArrayV2I64Clamp,
307   Suld2DArrayV4I8Clamp,
308   Suld2DArrayV4I16Clamp,
309   Suld2DArrayV4I32Clamp,
310 
311   Suld3DI8Clamp,
312   Suld3DI16Clamp,
313   Suld3DI32Clamp,
314   Suld3DI64Clamp,
315   Suld3DV2I8Clamp,
316   Suld3DV2I16Clamp,
317   Suld3DV2I32Clamp,
318   Suld3DV2I64Clamp,
319   Suld3DV4I8Clamp,
320   Suld3DV4I16Clamp,
321   Suld3DV4I32Clamp,
322 
323   Suld1DI8Trap,
324   Suld1DI16Trap,
325   Suld1DI32Trap,
326   Suld1DI64Trap,
327   Suld1DV2I8Trap,
328   Suld1DV2I16Trap,
329   Suld1DV2I32Trap,
330   Suld1DV2I64Trap,
331   Suld1DV4I8Trap,
332   Suld1DV4I16Trap,
333   Suld1DV4I32Trap,
334 
335   Suld1DArrayI8Trap,
336   Suld1DArrayI16Trap,
337   Suld1DArrayI32Trap,
338   Suld1DArrayI64Trap,
339   Suld1DArrayV2I8Trap,
340   Suld1DArrayV2I16Trap,
341   Suld1DArrayV2I32Trap,
342   Suld1DArrayV2I64Trap,
343   Suld1DArrayV4I8Trap,
344   Suld1DArrayV4I16Trap,
345   Suld1DArrayV4I32Trap,
346 
347   Suld2DI8Trap,
348   Suld2DI16Trap,
349   Suld2DI32Trap,
350   Suld2DI64Trap,
351   Suld2DV2I8Trap,
352   Suld2DV2I16Trap,
353   Suld2DV2I32Trap,
354   Suld2DV2I64Trap,
355   Suld2DV4I8Trap,
356   Suld2DV4I16Trap,
357   Suld2DV4I32Trap,
358 
359   Suld2DArrayI8Trap,
360   Suld2DArrayI16Trap,
361   Suld2DArrayI32Trap,
362   Suld2DArrayI64Trap,
363   Suld2DArrayV2I8Trap,
364   Suld2DArrayV2I16Trap,
365   Suld2DArrayV2I32Trap,
366   Suld2DArrayV2I64Trap,
367   Suld2DArrayV4I8Trap,
368   Suld2DArrayV4I16Trap,
369   Suld2DArrayV4I32Trap,
370 
371   Suld3DI8Trap,
372   Suld3DI16Trap,
373   Suld3DI32Trap,
374   Suld3DI64Trap,
375   Suld3DV2I8Trap,
376   Suld3DV2I16Trap,
377   Suld3DV2I32Trap,
378   Suld3DV2I64Trap,
379   Suld3DV4I8Trap,
380   Suld3DV4I16Trap,
381   Suld3DV4I32Trap,
382 
383   Suld1DI8Zero,
384   Suld1DI16Zero,
385   Suld1DI32Zero,
386   Suld1DI64Zero,
387   Suld1DV2I8Zero,
388   Suld1DV2I16Zero,
389   Suld1DV2I32Zero,
390   Suld1DV2I64Zero,
391   Suld1DV4I8Zero,
392   Suld1DV4I16Zero,
393   Suld1DV4I32Zero,
394 
395   Suld1DArrayI8Zero,
396   Suld1DArrayI16Zero,
397   Suld1DArrayI32Zero,
398   Suld1DArrayI64Zero,
399   Suld1DArrayV2I8Zero,
400   Suld1DArrayV2I16Zero,
401   Suld1DArrayV2I32Zero,
402   Suld1DArrayV2I64Zero,
403   Suld1DArrayV4I8Zero,
404   Suld1DArrayV4I16Zero,
405   Suld1DArrayV4I32Zero,
406 
407   Suld2DI8Zero,
408   Suld2DI16Zero,
409   Suld2DI32Zero,
410   Suld2DI64Zero,
411   Suld2DV2I8Zero,
412   Suld2DV2I16Zero,
413   Suld2DV2I32Zero,
414   Suld2DV2I64Zero,
415   Suld2DV4I8Zero,
416   Suld2DV4I16Zero,
417   Suld2DV4I32Zero,
418 
419   Suld2DArrayI8Zero,
420   Suld2DArrayI16Zero,
421   Suld2DArrayI32Zero,
422   Suld2DArrayI64Zero,
423   Suld2DArrayV2I8Zero,
424   Suld2DArrayV2I16Zero,
425   Suld2DArrayV2I32Zero,
426   Suld2DArrayV2I64Zero,
427   Suld2DArrayV4I8Zero,
428   Suld2DArrayV4I16Zero,
429   Suld2DArrayV4I32Zero,
430 
431   Suld3DI8Zero,
432   Suld3DI16Zero,
433   Suld3DI32Zero,
434   Suld3DI64Zero,
435   Suld3DV2I8Zero,
436   Suld3DV2I16Zero,
437   Suld3DV2I32Zero,
438   Suld3DV2I64Zero,
439   Suld3DV4I8Zero,
440   Suld3DV4I16Zero,
441   Suld3DV4I32Zero
442 };
443 }
444 
445 class NVPTXSubtarget;
446 
447 //===--------------------------------------------------------------------===//
448 // TargetLowering Implementation
449 //===--------------------------------------------------------------------===//
450 class NVPTXTargetLowering : public TargetLowering {
451 public:
452   explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
453                                const NVPTXSubtarget &STI);
454   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
455 
456   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
457 
458   const char *getTargetNodeName(unsigned Opcode) const override;
459 
460   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
461                           MachineFunction &MF,
462                           unsigned Intrinsic) const override;
463 
464   /// getFunctionParamOptimizedAlign - since function arguments are passed via
465   /// .param space, we may want to increase their alignment in a way that
466   /// ensures that we can effectively vectorize their loads & stores. We can
467   /// increase alignment only if the function has internal or has private
468   /// linkage as for other linkage types callers may already rely on default
469   /// alignment. To allow using 128-bit vectorized loads/stores, this function
470   /// ensures that alignment is 16 or greater.
471   Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy,
472                                        const DataLayout &DL) const;
473 
474   /// Helper for computing alignment of a device function byval parameter.
475   Align getFunctionByValParamAlign(const Function *F, Type *ArgTy,
476                                    Align InitialAlign,
477                                    const DataLayout &DL) const;
478 
479   // Helper for getting a function parameter name. Name is composed from
480   // its index and the function name. Negative index corresponds to special
481   // parameter (unsized array) used for passing variable arguments.
482   std::string getParamName(const Function *F, int Idx) const;
483 
484   /// isLegalAddressingMode - Return true if the addressing mode represented
485   /// by AM is legal for this target, for a load/store of the specified type
486   /// Used to guide target specific optimizations, like loop strength
487   /// reduction (LoopStrengthReduce.cpp) and memory optimization for
488   /// address mode (CodeGenPrepare.cpp)
489   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
490                              unsigned AS,
491                              Instruction *I = nullptr) const override;
492 
493   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
494     // Truncating 64-bit to 32-bit is free in SASS.
495     if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
496       return false;
497     return SrcTy->getPrimitiveSizeInBits() == 64 &&
498            DstTy->getPrimitiveSizeInBits() == 32;
499   }
500 
501   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
502                          EVT VT) const override {
503     if (VT.isVector())
504       return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
505     return MVT::i1;
506   }
507 
508   ConstraintType getConstraintType(StringRef Constraint) const override;
509   std::pair<unsigned, const TargetRegisterClass *>
510   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
511                                StringRef Constraint, MVT VT) const override;
512 
513   SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
514                                bool isVarArg,
515                                const SmallVectorImpl<ISD::InputArg> &Ins,
516                                const SDLoc &dl, SelectionDAG &DAG,
517                                SmallVectorImpl<SDValue> &InVals) const override;
518 
519   SDValue LowerCall(CallLoweringInfo &CLI,
520                     SmallVectorImpl<SDValue> &InVals) const override;
521 
522   SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
523 
524   std::string
525   getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
526                const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment,
527                std::optional<std::pair<unsigned, const APInt &>> VAInfo,
528                const CallBase &CB, unsigned UniqueCallSite) const;
529 
530   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
531                       const SmallVectorImpl<ISD::OutputArg> &Outs,
532                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
533                       SelectionDAG &DAG) const override;
534 
535   void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
536                                     std::vector<SDValue> &Ops,
537                                     SelectionDAG &DAG) const override;
538 
539   const NVPTXTargetMachine *nvTM;
540 
541   // PTX always uses 32-bit shift amounts
542   MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
543     return MVT::i32;
544   }
545 
546   TargetLoweringBase::LegalizeTypeAction
547   getPreferredVectorAction(MVT VT) const override;
548 
549   // Get the degree of precision we want from 32-bit floating point division
550   // operations.
551   //
552   //  0 - Use ptx div.approx
553   //  1 - Use ptx.div.full (approximate, but less so than div.approx)
554   //  2 - Use IEEE-compliant div instructions, if available.
555   int getDivF32Level() const;
556 
557   // Get whether we should use a precise or approximate 32-bit floating point
558   // sqrt instruction.
559   bool usePrecSqrtF32() const;
560 
561   // Get whether we should use instructions that flush floating-point denormals
562   // to sign-preserving zero.
563   bool useF32FTZ(const MachineFunction &MF) const;
564 
565   SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
566                           int &ExtraSteps, bool &UseOneConst,
567                           bool Reciprocal) const override;
568 
569   unsigned combineRepeatedFPDivisors() const override { return 2; }
570 
571   bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
572   bool allowUnsafeFPMath(MachineFunction &MF) const;
573 
574   bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
575                                   EVT) const override {
576     return true;
577   }
578 
579   bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
580 
581   // The default is to transform llvm.ctlz(x, false) (where false indicates that
582   // x == 0 is not undefined behavior) into a branch that checks whether x is 0
583   // and avoids calling ctlz in that case.  We have a dedicated ctlz
584   // instruction, so we say that ctlz is cheap to speculate.
585   bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; }
586 
587   AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
588     return AtomicExpansionKind::None;
589   }
590 
591   AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override {
592     return AtomicExpansionKind::None;
593   }
594 
595   AtomicExpansionKind
596   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
597 
598   bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
599     // There's rarely any point of packing something into a vector type if we
600     // already have the source data.
601     return true;
602   }
603 
604 private:
605   const NVPTXSubtarget &STI; // cache the subtarget here
606   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
607 
608   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
609   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
610   SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
611   SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
612   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
613 
614   SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
615   SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
616   SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
617 
618   SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
619   SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
620 
621   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
622   SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
623 
624   SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
625   SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
626   SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
627 
628   SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
629   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
630 
631   SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
632 
633   SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
634   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
635 
636   void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
637                           SelectionDAG &DAG) const override;
638   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
639 
640   Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx,
641                              const DataLayout &DL) const;
642 };
643 
644 } // namespace llvm
645 
646 #endif
647