xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX<60>, hasSM<30>]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX<60>, hasSM<30>]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX<60>, hasSM<30>]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX<60>, hasSM<30>]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX<60>, hasSM<30>]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX<60>, hasSM<30>]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX<60>, hasSM<30>]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX<60>, hasSM<30>]>;
135
136class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
137                          list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
138        NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
139        Requires<Preds>;
140
141def barrier_cluster_arrive:
142        INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
143def barrier_cluster_arrive_relaxed:
144        INT_BARRIER_CLUSTER<"arrive.relaxed",
145        int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
146def barrier_cluster_wait:
147        INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
148
149// 'aligned' versions of the cluster barrier intrinsics
150def barrier_cluster_arrive_aligned:
151        INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
152def barrier_cluster_arrive_relaxed_aligned:
153        INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
154        int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
155def barrier_cluster_wait_aligned:
156        INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
157
158class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
159                 bit offset_imm, bit mask_imm, bit threadmask_imm>
160      : NVPTXInst<(outs), (ins), "?", []> {
161  NVPTXRegClass rc = !cond(
162    !eq(reg, "i32"): Int32Regs,
163    !eq(reg, "f32"): Float32Regs);
164  string IntrName = "int_nvvm_shfl_"
165                    # !if(sync, "sync_", "")
166                    # mode
167                    # "_" # reg
168                    # !if(return_pred, "p", "");
169  Intrinsic Intr = !cast<Intrinsic>(IntrName);
170  let InOperandList = !con(
171    !if(sync,
172        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
173        (ins)),
174    (ins rc:$src),
175    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
176    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
177    );
178  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
179  let AsmString = "shfl."
180     # !if(sync, "sync.", "")
181     # mode # ".b32\t"
182     # "$dst"
183     # !if(return_pred, "|$pred", "") # ", "
184     # "$src, $offset, $mask"
185     # !if(sync, ", $threadmask", "")
186     # ";"
187     ;
188  let Pattern = [!con(
189      !foreach(tmp, OutOperandList,
190             !subst(outs, set,
191             !subst(i32imm, imm, tmp))),
192      (set !foreach(tmp, InOperandList,
193             !subst(ins, Intr,
194             !subst(i32imm, imm, tmp))))
195  )];
196}
197
198foreach sync = [false, true] in {
199  foreach mode = ["up", "down", "bfly", "idx"] in {
200    foreach regclass = ["i32", "f32"] in {
201      foreach return_pred = [false, true] in {
202        foreach offset_imm = [false, true] in {
203          foreach mask_imm = [false, true] in {
204            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
205              def : SHFL_INSTR<sync, mode, regclass, return_pred,
206                               offset_imm, mask_imm, threadmask_imm>,
207                    Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
208            }
209          }
210        }
211      }
212    }
213  }
214}
215
216// vote.{all,any,uni,ballot}
217multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
218  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
219              "vote." # mode # " \t$dest, $pred;",
220              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
221        Requires<[hasPTX<60>, hasSM<30>]>;
222}
223
224defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
225defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
226defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
227defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
228
229// vote.sync.{all,any,uni,ballot}
230multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
231  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
232              "vote.sync." # mode # " \t$dest, $pred, $mask;",
233              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
234          Requires<[hasPTX<60>, hasSM<30>]>;
235  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
236              "vote.sync." # mode #" \t$dest, $pred, $mask;",
237              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
238          Requires<[hasPTX<60>, hasSM<30>]>;
239}
240
241defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
242defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
243defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
244defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
245
246multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
247                          Operand ImmOp> {
248  def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
249              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
250              [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
251           Requires<[hasPTX<60>, hasSM<70>]>;
252  def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
253              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
254              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
255           Requires<[hasPTX<60>, hasSM<70>]>;
256  def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
257              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
258              [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
259           Requires<[hasPTX<60>, hasSM<70>]>;
260  def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
261              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
262              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
263           Requires<[hasPTX<60>, hasSM<70>]>;
264}
265
266defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
267                                        i32imm>;
268defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
269                                        i64imm>;
270
271multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
272                          Operand ImmOp> {
273  def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
274                     (ins i32imm:$mask, ImmOp:$value),
275              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
276              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
277           Requires<[hasPTX<60>, hasSM<70>]>;
278  def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
279                     (ins Int32Regs:$mask, ImmOp:$value),
280              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
281              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
282           Requires<[hasPTX<60>, hasSM<70>]>;
283  def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
284                     (ins i32imm:$mask, regclass:$value),
285              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
286              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
287           Requires<[hasPTX<60>, hasSM<70>]>;
288  def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
289                     (ins Int32Regs:$mask, regclass:$value),
290              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
291              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
292           Requires<[hasPTX<60>, hasSM<70>]>;
293}
294defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
295                                         i32imm>;
296defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
297                                         i64imm>;
298
299multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
300  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
301          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
302          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
303        Requires<[hasPTX<70>, hasSM<80>]>;
304}
305
306defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
307defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
308defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
309defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
310defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
311defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
312defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
313defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
314
315} // isConvergent = true
316
317//-----------------------------------
318// Explicit Memory Fence Functions
319//-----------------------------------
320class MEMBAR<string StrOp, Intrinsic IntOP> :
321              NVPTXInst<(outs), (ins),
322            StrOp, [(IntOP)]>;
323
324def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
325def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
326def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
327
328def INT_FENCE_SC_CLUSTER:
329       MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
330       Requires<[hasPTX<78>, hasSM<90>]>;
331
332//-----------------------------------
333// Async Copy Functions
334//-----------------------------------
335
336multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
337  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
338            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
339            [(Intrin Int32Regs:$addr)]>,
340    Requires<[hasPTX<70>, hasSM<80>]>;
341  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
342            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
343            [(Intrin Int64Regs:$addr)]>,
344    Requires<[hasPTX<70>, hasSM<80>]>;
345}
346
347defm CP_ASYNC_MBARRIER_ARRIVE :
348  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
349defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
350  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
351defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
352  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
353defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
354  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
355
356multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
357  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
358            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
359            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
360    Requires<[hasPTX<70>, hasSM<80>]>;
361  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
362            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
363            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
364    Requires<[hasPTX<70>, hasSM<80>]>;
365  // Variant with src_size parameter
366  def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
367             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
368             [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
369    Requires<[hasPTX<70>, hasSM<80>]>;
370  def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
371             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
372             [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
373    Requires<[hasPTX<70>, hasSM<80>]>;
374  def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
375             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
376             [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
377    Requires<[hasPTX<70>, hasSM<80>]>;
378  def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
379             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
380             [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
381    Requires<[hasPTX<70>, hasSM<80>]>;
382}
383
384defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
385  CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
386                                      int_nvvm_cp_async_ca_shared_global_4_s>;
387
388defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
389  CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
390                                      int_nvvm_cp_async_ca_shared_global_8_s>;
391
392defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
393  CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
394                                       int_nvvm_cp_async_ca_shared_global_16_s>;
395
396defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
397  CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
398                                       int_nvvm_cp_async_cg_shared_global_16_s>;
399
400def CP_ASYNC_COMMIT_GROUP :
401  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
402  Requires<[hasPTX<70>, hasSM<80>]>;
403
404def CP_ASYNC_WAIT_GROUP :
405  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
406  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
407  Requires<[hasPTX<70>, hasSM<80>]>;
408
409def CP_ASYNC_WAIT_ALL :
410  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
411  [(int_nvvm_cp_async_wait_all)]>,
412  Requires<[hasPTX<70>, hasSM<80>]>;
413
414// cp.async.bulk variants of the commit/wait group
415def CP_ASYNC_BULK_COMMIT_GROUP :
416  NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
417  [(int_nvvm_cp_async_bulk_commit_group)]>,
418  Requires<[hasPTX<80>, hasSM<90>]>;
419
420def CP_ASYNC_BULK_WAIT_GROUP :
421  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
422  [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
423  Requires<[hasPTX<80>, hasSM<90>]>;
424
425def CP_ASYNC_BULK_WAIT_GROUP_READ :
426  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
427  [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
428  Requires<[hasPTX<80>, hasSM<90>]>;
429
430//-----------------------------------
431// MBarrier Functions
432//-----------------------------------
433
434multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
435  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
436           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
437    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
438    Requires<[hasPTX<70>, hasSM<80>]>;
439  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
440           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
441    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
442    Requires<[hasPTX<70>, hasSM<80>]>;
443}
444
445defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
446defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
447                                          int_nvvm_mbarrier_init_shared>;
448
449multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
450  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
451           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
452    [(Intrin Int32Regs:$addr)]>,
453    Requires<[hasPTX<70>, hasSM<80>]>;
454  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
455           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
456    [(Intrin Int64Regs:$addr)]>,
457    Requires<[hasPTX<70>, hasSM<80>]>;
458}
459
460defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
461defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
462                                            int_nvvm_mbarrier_inval_shared>;
463
464multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
465  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
466           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
467    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
468    Requires<[hasPTX<70>, hasSM<80>]>;
469  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
470           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
471    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
472    Requires<[hasPTX<70>, hasSM<80>]>;
473}
474
475defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
476defm MBARRIER_ARRIVE_SHARED :
477  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
478
479multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
480  def _32 : NVPTXInst<(outs Int64Regs:$state),
481           (ins Int32Regs:$addr, Int32Regs:$count),
482           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
483                      ".b64 $state, [$addr], $count;"),
484    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
485    Requires<[hasPTX<70>, hasSM<80>]>;
486  def _64 : NVPTXInst<(outs Int64Regs:$state),
487           (ins Int64Regs:$addr, Int32Regs:$count),
488           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
489                      ".b64 $state, [$addr], $count;"),
490    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
491    Requires<[hasPTX<70>, hasSM<80>]>;
492}
493
494defm MBARRIER_ARRIVE_NOCOMPLETE :
495  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
496defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
497  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
498
499multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
500  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
501           !strconcat("mbarrier.arrive_drop", AddrSpace,
502                      ".b64 $state, [$addr];"),
503           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
504    Requires<[hasPTX<70>, hasSM<80>]>;
505  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
506           !strconcat("mbarrier.arrive_drop", AddrSpace,
507                      ".b64 $state, [$addr];"),
508           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
509    Requires<[hasPTX<70>, hasSM<80>]>;
510}
511
512defm MBARRIER_ARRIVE_DROP :
513  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
514defm MBARRIER_ARRIVE_DROP_SHARED :
515  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
516
517multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
518  def _32 : NVPTXInst<(outs Int64Regs:$state),
519           (ins Int32Regs:$addr, Int32Regs:$count),
520           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
521                      ".b64 $state, [$addr], $count;"),
522           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
523    Requires<[hasPTX<70>, hasSM<80>]>;
524  def _64 : NVPTXInst<(outs Int64Regs:$state),
525           (ins Int64Regs:$addr, Int32Regs:$count),
526           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
527                      ".b64 $state, [$addr], $count;"),
528           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
529    Requires<[hasPTX<70>, hasSM<80>]>;
530}
531
532defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
533  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
534defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
535  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
536                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
537
538multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
539  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
540           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
541           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
542    Requires<[hasPTX<70>, hasSM<80>]>;
543  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
544           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
545           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
546    Requires<[hasPTX<70>, hasSM<80>]>;
547}
548
549defm MBARRIER_TEST_WAIT :
550  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
551defm MBARRIER_TEST_WAIT_SHARED :
552  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
553
554class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
555           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
556           "mbarrier.pending_count.b64 $res, $state;",
557           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
558    Requires<[hasPTX<70>, hasSM<80>]>;
559
560def MBARRIER_PENDING_COUNT :
561  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
562
563//-----------------------------------
564// Math Functions
565//-----------------------------------
566
567// Map min(1.0, max(0.0, x)) to sat(x)
568// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
569// NaN
570// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
571// Same story for fmax, fmin.
572
573def : Pat<(int_nvvm_fmin_f immFloat1,
574            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
575          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
576def : Pat<(int_nvvm_fmin_f immFloat1,
577            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
578          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
579def : Pat<(int_nvvm_fmin_f
580            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
581          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
582def : Pat<(int_nvvm_fmin_f
583            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
584          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
585
586def : Pat<(int_nvvm_fmin_d immDouble1,
587            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
588          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
589def : Pat<(int_nvvm_fmin_d immDouble1,
590            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
591          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
592def : Pat<(int_nvvm_fmin_d
593            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
594          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
595def : Pat<(int_nvvm_fmin_d
596            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
597          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
598
599
600// We need a full string for OpcStr here because we need to deal with case like
601// INT_PTX_RECIP.
602class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
603  NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
604            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
605            OpcStr,
606        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
607        Requires<Preds>;
608
609// We need a full string for OpcStr here because we need to deal with the case
610// like INT_PTX_NATIVE_POWR_F.
611class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
612  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
613  list<Predicate> Preds = []>
614            : NVPTXInst<(outs t_regclass:$dst),
615              (ins s0_regclass:$src0, s1_regclass:$src1),
616            OpcStr,
617        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
618        Requires<Preds>;
619
620class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
621  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
622  NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
623            : NVPTXInst<(outs t_regclass:$dst),
624              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
625            OpcStr,
626        [(set t_regclass:$dst,
627          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
628          Requires<Preds>;
629
630//
631// MISC
632//
633
634def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
635  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
636
637//
638// Min Max
639//
640
641def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
642  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
643def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
644  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
645def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
646  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
647  [hasPTX<70>, hasSM<80>]>;
648def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
649  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
650  [hasPTX<70>, hasSM<80>]>;
651def INT_NVVM_FMIN_XORSIGN_ABS_F :
652  F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
653    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
654    [hasPTX<72>, hasSM<86>]>;
655def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
656  F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
657    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
658    [hasPTX<72>, hasSM<86>]>;
659def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
660  F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
661    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
662    [hasPTX<72>, hasSM<86>]>;
663def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
664  F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
665    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
666    [hasPTX<72>, hasSM<86>]>;
667
668def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
669  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
670def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
671  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
672def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
673  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
674  [hasPTX<70>, hasSM<80>]>;
675def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
676  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
677  [hasPTX<70>, hasSM<80>]>;
678def INT_NVVM_FMAX_XORSIGN_ABS_F :
679  F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
680    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
681    [hasPTX<72>, hasSM<86>]>;
682def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
683  F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
684    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
685    [hasPTX<72>, hasSM<86>]>;
686def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
687  F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
688    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
689    [hasPTX<72>, hasSM<86>]>;
690def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
691  F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
692    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
693    [hasPTX<72>, hasSM<86>]>;
694
695def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
696  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
697def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
698  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
699
700//
701// Min Max f16, f16x2, bf16, bf16x2
702//
703
704class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
705                    list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
706  string Variant = V;
707  Intrinsic Intr = I;
708  NVPTXRegClass RegClass = RC;
709  list<Predicate> Predicates = Preds;
710}
711
712multiclass MIN_MAX<string IntName> {
713  foreach P = [
714    MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
715      int_nvvm_fmax_f16), Int16Regs>,
716    MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
717      int_nvvm_fmax_ftz_f16), Int16Regs>,
718    MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
719      int_nvvm_fmax_nan_f16), Int16Regs>,
720    MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
721      int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
722    MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
723      int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
724      Int16Regs, [hasPTX<72>, hasSM<86>]>,
725    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
726      int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
727      Int16Regs, [hasPTX<72>, hasSM<86>]>,
728    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
729      int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
730      Int16Regs, [hasPTX<72>, hasSM<86>]>,
731    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
732      int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
733      int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
734    MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
735      int_nvvm_fmax_f16x2), Int32Regs>,
736    MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
737      int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
738    MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
739      int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
740    MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
741      int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
742    MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
743      int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
744      Int32Regs, [hasPTX<72>, hasSM<86>]>,
745    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
746      int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
747      Int32Regs, [hasPTX<72>, hasSM<86>]>,
748    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
749      int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
750      Int32Regs, [hasPTX<72>, hasSM<86>]>,
751    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
752      int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
753      int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
754      Int32Regs, [hasPTX<72>, hasSM<86>]>,
755    MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
756      int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
757    MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
758      int_nvvm_fmax_nan_bf16), Int16Regs>,
759    MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
760      int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
761      Int16Regs, [hasPTX<72>, hasSM<86>]>,
762    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
763      int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
764      Int16Regs, [hasPTX<72>, hasSM<86>]>,
765    MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
766      int_nvvm_fmax_bf16x2), Int32Regs>,
767    MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
768      int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
769    MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
770      int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
771      Int32Regs, [hasPTX<72>, hasSM<86>]>,
772    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
773      int_nvvm_fmin_nan_xorsign_abs_bf16x2,
774      int_nvvm_fmax_nan_xorsign_abs_bf16x2),
775      Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
776        def P.Variant : F_MATH_2<!strconcat(
777          IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
778          P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
779  }
780}
781
782defm INT_NVVM_FMIN : MIN_MAX<"min">;
783defm INT_NVVM_FMAN : MIN_MAX<"max">;
784
785//
786// Multiplication
787//
788
789def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs,
790  Int16Regs, Int16Regs, int_nvvm_mulhi_s>;
791def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs,
792  Int16Regs, Int16Regs, int_nvvm_mulhi_us>;
793def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
794  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
795def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
796  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
797def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
798  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
799def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
800  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
801
802def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
803  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
804def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
805  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
806def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
807  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
808def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
809  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
810def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
811  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
812def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
813  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
814def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
815  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
816def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
817  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
818
819def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
820  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
821def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
822  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
823def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
824  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
825def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
826  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
827
828def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
829  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
830def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
831  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
832
833//
834// Div
835//
836
837def INT_NVVM_DIV_APPROX_FTZ_F
838  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
839    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
840def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
841  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
842
843def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
844  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
845def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
846  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
847def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
848  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
849def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
850  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
851def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
852  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
853def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
854  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
855def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
856  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
857def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
858  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
859
860def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
861  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
862def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
863  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
864def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
865  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
866def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
867  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
868
869//
870// Sad
871//
872
873def INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;",
874  Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>;
875def INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;",
876  Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>;
877def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
878  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
879def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
880  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
881def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;",
882  Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>;
883def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
884  Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>;
885
886//
887// Floor  Ceil
888//
889
890def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
891          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
892def : Pat<(int_nvvm_floor_f Float32Regs:$a),
893          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
894def : Pat<(int_nvvm_floor_d Float64Regs:$a),
895          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
896
897def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
898          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
899def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
900          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
901def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
902          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
903
904//
905// Abs
906//
907
908def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
909  Float32Regs, int_nvvm_fabs_ftz_f>;
910def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
911  Float32Regs, int_nvvm_fabs_f>;
912
913def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
914  Float64Regs, int_nvvm_fabs_d>;
915
916//
917// Abs, Neg bf16, bf16x2
918//
919
920def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
921  Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
922def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
923  Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
924def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
925  Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
926def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
927  Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
928
929//
930// Round
931//
932
933def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
934          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
935def : Pat<(int_nvvm_round_f Float32Regs:$a),
936          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
937def : Pat<(int_nvvm_round_d Float64Regs:$a),
938          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
939
940//
941// Trunc
942//
943
944def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
945          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
946def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
947          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
948def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
949          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
950
951//
952// Saturate
953//
954
955def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
956          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
957def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
958          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
959def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
960          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
961
962//
963// Exp2  Log2
964//
965
966def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
967  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
968def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
969  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
970def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
971  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
972def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
973  Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
974def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
975  Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
976
977def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
978  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
979def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
980  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
981def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
982  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
983
984//
985// Sin  Cos
986//
987
988def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
989  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
990def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
991  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
992
993def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
994  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
995def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
996  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
997
998//
999// Fma
1000//
1001
1002class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
1003                list<Predicate> Preds = []> {
1004  string Variant = V;
1005  Intrinsic Intr = I;
1006  NVPTXRegClass RegClass = RC;
1007  list<Predicate> Predicates = Preds;
1008}
1009
1010multiclass FMA_INST {
1011  foreach P = [
1012    FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
1013    FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
1014    FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
1015    FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
1016
1017    FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
1018    FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
1019    FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
1020    FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
1021    FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
1022    FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
1023    FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
1024    FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
1025
1026    FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
1027    FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
1028      [hasPTX<42>, hasSM<53>]>,
1029    FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
1030      [hasPTX<42>, hasSM<53>]>,
1031    FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
1032      [hasPTX<42>, hasSM<53>]>,
1033    FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
1034      [hasPTX<70>, hasSM<80>]>,
1035    FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
1036      [hasPTX<70>, hasSM<80>]>,
1037
1038    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1039    FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1040      [hasPTX<70>, hasSM<80>]>,
1041    FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1042      [hasPTX<70>, hasSM<80>]>,
1043    FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1044      [hasPTX<70>, hasSM<80>]>,
1045    FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1046      [hasPTX<70>, hasSM<80>]>,
1047    FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1048      [hasPTX<70>, hasSM<80>]>,
1049
1050    FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1051      [hasPTX<42>, hasSM<53>]>,
1052    FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1053      [hasPTX<42>, hasSM<53>]>,
1054    FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1055      [hasPTX<42>, hasSM<53>]>,
1056    FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1057      Int32Regs, [hasPTX<42>, hasSM<53>]>,
1058    FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1059      [hasPTX<70>, hasSM<80>]>,
1060    FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1061      Int32Regs, [hasPTX<70>, hasSM<80>]>,
1062    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1063      [hasPTX<70>, hasSM<80>]>,
1064    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1065      [hasPTX<70>, hasSM<80>]>
1066  ] in {
1067    def P.Variant :
1068      F_MATH_3<!strconcat("fma",
1069        !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1070        P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1071  }
1072}
1073
1074defm INT_NVVM_FMA : FMA_INST;
1075
1076//
1077// Rcp
1078//
1079
1080def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1081  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1082def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1083  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1084def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1085  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1086def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1087  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1088def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1089  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1090def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1091  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1092def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1093  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1094def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1095  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1096
1097def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1098  Float64Regs, int_nvvm_rcp_rn_d>;
1099def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1100  Float64Regs, int_nvvm_rcp_rz_d>;
1101def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1102  Float64Regs, int_nvvm_rcp_rm_d>;
1103def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1104  Float64Regs, int_nvvm_rcp_rp_d>;
1105
1106def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1107  Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1108def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1109  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1110
1111//
1112// Sqrt
1113//
1114
1115def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1116  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1117def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1118  Float32Regs, int_nvvm_sqrt_rn_f>;
1119def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1120  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1121def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1122  Float32Regs, int_nvvm_sqrt_rz_f>;
1123def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1124  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1125def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1126  Float32Regs, int_nvvm_sqrt_rm_f>;
1127def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1128  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1129def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1130  Float32Regs, int_nvvm_sqrt_rp_f>;
1131def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1132  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1133def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1134  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1135
1136def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1137  Float64Regs, int_nvvm_sqrt_rn_d>;
1138def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1139  Float64Regs, int_nvvm_sqrt_rz_d>;
1140def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1141  Float64Regs, int_nvvm_sqrt_rm_d>;
1142def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1143  Float64Regs, int_nvvm_sqrt_rp_d>;
1144
1145// nvvm_sqrt intrinsic
1146def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1147          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1148def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1149          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1150def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1151          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1152def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1153          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1154
1155//
1156// Rsqrt
1157//
1158
1159def INT_NVVM_RSQRT_APPROX_FTZ_F
1160  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1161    int_nvvm_rsqrt_approx_ftz_f>;
1162def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1163  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1164def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1165  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1166
1167//
1168// Add
1169//
1170
1171def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1172  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1173def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1174  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1175def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1176  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1177def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1178  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1179def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1180  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1181def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1182  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1183def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1184  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1185def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1186  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1187
1188def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1189  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1190def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1191  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1192def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1193  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1194def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1195  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1196
1197//
1198// Convert
1199//
1200
1201def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1202          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1203def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1204          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1205def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1206          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1207def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1208          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1209def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1210          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1211def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1212          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1213def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1214          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1215def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1216          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1217
1218def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1219          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1220def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1221          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1222def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1223          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1224def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1225          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1226
1227def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1228          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1229def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1230          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1231def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1232          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1233def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1234          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1235
1236def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1237          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1238def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1239          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1240def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1241          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1242def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1243          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1244
1245def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1246          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1247def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1248          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1249def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1250          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1251def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1252          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1253
1254def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1255          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1256def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1257          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1258def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1259          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1260def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1261          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1262def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1263          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1264def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1265          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1266def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1267          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1268def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1269          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1270
1271def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1272          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1273def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1274          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1275def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1276          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1277def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1278          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1279def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1280          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1281def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1282          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1283def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1284          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1285def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1286          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1287
1288def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1289          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1290def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1291          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1292def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1293          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1294def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1295          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1296
1297def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1298          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1299def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1300          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1301def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1302          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1303def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1304          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1305
1306def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1307          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1308def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1309          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1310def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1311          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1312def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1313          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1314
1315def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1316          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1317def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1318          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1319def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1320          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1321def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1322          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1323
1324def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1325          (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1326def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1327          (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1328def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1329          (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1330def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1331          (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1332
1333def CVT_tf32_f32 :
1334   NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1335                   "cvt.rna.tf32.f32 \t$dest, $a;",
1336       [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1337
1338def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1339  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1340
1341def INT_NVVM_D2I_LO : F_MATH_1<
1342  !strconcat("{{\n\t",
1343             ".reg .b32 %temp; \n\t",
1344             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1345             "}}"),
1346  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1347def INT_NVVM_D2I_HI : F_MATH_1<
1348  !strconcat("{{\n\t",
1349             ".reg .b32 %temp; \n\t",
1350             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1351             "}}"),
1352  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1353
1354def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1355          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1356def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1357          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1358def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1359          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1360def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1361          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1362def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1363          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1364def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1365          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1366def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1367          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1368def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1369          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1370
1371def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1372          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1373def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1374          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1375def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1376          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1377def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1378          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1379def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1380          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1381def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1382          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1383def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1384          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1385def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1386          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1387
1388def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1389          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1390def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1391          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1392def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1393          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1394def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1395          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1396
1397def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1398          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1399def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1400          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1401def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1402          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1403def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1404          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1405
1406def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1407          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1408def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1409          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1410def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1411          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1412def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1413          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1414
1415def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1416          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1417def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1418          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1419def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1420          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1421def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1422          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1423
1424def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1425          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1426def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1427          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1428def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1429          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1430def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1431          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1432
1433def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1434          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1435def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1436          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1437def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1438          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1439def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1440          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1441
1442
1443def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1444          (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
1445def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1446          (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
1447
1448//
1449// Bitcast
1450//
1451
1452def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1453  Float32Regs, int_nvvm_bitcast_f2i>;
1454def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1455  Int32Regs, int_nvvm_bitcast_i2f>;
1456
1457def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1458  Int64Regs, int_nvvm_bitcast_ll2d>;
1459def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1460  Float64Regs, int_nvvm_bitcast_d2ll>;
1461
1462//
1463// FNS
1464//
1465
1466class INT_FNS_MBO<dag ins, dag Operands>
1467  : NVPTXInst<(outs Int32Regs:$dst), ins,
1468               "fns.b32 \t$dst, $mask, $base, $offset;",
1469               [(set Int32Regs:$dst, Operands )]>,
1470    Requires<[hasPTX<60>, hasSM<30>]>;
1471
1472def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1473                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1474def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1475                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1476def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1477                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1478def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1479                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1480def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1481                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1482def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1483                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1484def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1485                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1486def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1487                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1488
1489//-----------------------------------
1490// Atomic Functions
1491//-----------------------------------
1492
1493class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1494 : PatFrag<ops, frag, AS_match.global>;
1495class ATOMIC_SHARED_CHK <dag ops, dag frag>
1496 : PatFrag<ops, frag, AS_match.shared>;
1497class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1498 : PatFrag<ops, frag, AS_match.generic>;
1499
1500multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1501  ValueType regT, NVPTXRegClass regclass,
1502  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1503  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1504  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1505    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1506    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1507  Requires<Pred>;
1508  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1509    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1510    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1511  Requires<Pred>;
1512}
1513multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1514  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1515  list<Predicate> Pred = []> {
1516  defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1517    IntOp, IMMType, IMM, Pred>;
1518  defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1519    IntOp, IMMType, IMM, Pred>;
1520}
1521
1522// has 2 operands, neg the second one
1523multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1524  ValueType regT, NVPTXRegClass regclass,
1525  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1526  list<Predicate> Pred> {
1527  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1528    !strconcat(
1529      "{{ \n\t",
1530      ".reg \t.s", TypeStr, " temp; \n\t",
1531      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1532      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1533      "}}"),
1534    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1535  Requires<Pred>;
1536}
1537multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1538  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1539 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1540   IntOp, Pred> ;
1541 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1542   IntOp, Pred> ;
1543}
1544
1545// has 3 operands
1546multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1547  ValueType regT, NVPTXRegClass regclass,
1548  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1549  Operand IMMType, list<Predicate> Pred> {
1550  def reg : NVPTXInst<(outs regclass:$dst),
1551    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1552    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1553    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1554  Requires<Pred>;
1555
1556  def imm1 : NVPTXInst<(outs regclass:$dst),
1557    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1558    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1559    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1560  Requires<Pred>;
1561
1562  def imm2 : NVPTXInst<(outs regclass:$dst),
1563    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1564    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1565    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1566  Requires<Pred>;
1567
1568  def imm3 : NVPTXInst<(outs regclass:$dst),
1569    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1570    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1571    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1572  Requires<Pred>;
1573}
1574multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1575  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1576  defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1577    IntOp, IMMType, Pred>;
1578  defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1579    IntOp, IMMType, Pred>;
1580}
1581
1582// atom_add
1583
1584def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1585  (atomic_load_add_32 node:$a, node:$b)>;
1586def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1587  (atomic_load_add_32 node:$a, node:$b)>;
1588def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1589  (atomic_load_add_32 node:$a, node:$b)>;
1590def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1591  (atomic_load_add_64 node:$a, node:$b)>;
1592def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1593  (atomic_load_add_64 node:$a, node:$b)>;
1594def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1595  (atomic_load_add_64 node:$a, node:$b)>;
1596def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1597  (atomic_load_fadd node:$a, node:$b)>;
1598def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1599  (atomic_load_fadd node:$a, node:$b)>;
1600def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1601  (atomic_load_fadd node:$a, node:$b)>;
1602
1603defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1604  atomic_load_add_32_g, i32imm, imm>;
1605defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1606  atomic_load_add_32_s, i32imm, imm>;
1607defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1608  atomic_load_add_32_gen, i32imm, imm>;
1609defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1610  ".add", atomic_load_add_32_gen, i32imm, imm>;
1611
1612defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1613  atomic_load_add_64_g, i64imm, imm>;
1614defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1615  atomic_load_add_64_s, i64imm, imm>;
1616defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1617  atomic_load_add_64_gen, i64imm, imm>;
1618defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1619  ".add", atomic_load_add_64_gen, i64imm, imm>;
1620
1621defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
1622  atomic_load_add_g, f32imm, fpimm>;
1623defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
1624  atomic_load_add_s, f32imm, fpimm>;
1625defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
1626  atomic_load_add_gen, f32imm, fpimm>;
1627
1628defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
1629  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1630defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
1631  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1632defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
1633  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1634
1635// atom_sub
1636
1637def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1638  (atomic_load_sub_32 node:$a, node:$b)>;
1639def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1640  (atomic_load_sub_32 node:$a, node:$b)>;
1641def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1642  (atomic_load_sub_32 node:$a, node:$b)>;
1643def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1644  (atomic_load_sub_64 node:$a, node:$b)>;
1645def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1646  (atomic_load_sub_64 node:$a, node:$b)>;
1647def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1648  (atomic_load_sub_64 node:$a, node:$b)>;
1649
1650defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1651  atomic_load_sub_32_g>;
1652defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1653  atomic_load_sub_64_g>;
1654defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1655  atomic_load_sub_32_gen>;
1656defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1657  ".add", atomic_load_sub_32_gen>;
1658defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1659  atomic_load_sub_32_s>;
1660defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1661  atomic_load_sub_64_s>;
1662defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1663  atomic_load_sub_64_gen>;
1664defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1665  ".add", atomic_load_sub_64_gen>;
1666
1667// atom_swap
1668
1669def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1670  (atomic_swap_32 node:$a, node:$b)>;
1671def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1672  (atomic_swap_32 node:$a, node:$b)>;
1673def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1674  (atomic_swap_32 node:$a, node:$b)>;
1675def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1676  (atomic_swap_64 node:$a, node:$b)>;
1677def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1678  (atomic_swap_64 node:$a, node:$b)>;
1679def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1680  (atomic_swap_64 node:$a, node:$b)>;
1681
1682defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1683  atomic_swap_32_g, i32imm, imm>;
1684defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1685  atomic_swap_32_s, i32imm, imm>;
1686defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1687  atomic_swap_32_gen, i32imm, imm>;
1688defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1689  ".exch", atomic_swap_32_gen, i32imm, imm>;
1690defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1691  atomic_swap_64_g, i64imm, imm>;
1692defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1693  atomic_swap_64_s, i64imm, imm>;
1694defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1695  atomic_swap_64_gen, i64imm, imm>;
1696defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1697  ".exch", atomic_swap_64_gen, i64imm, imm>;
1698
1699// atom_max
1700
1701def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1702  , (atomic_load_max_32 node:$a, node:$b)>;
1703def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1704  (atomic_load_max_32 node:$a, node:$b)>;
1705def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1706  (atomic_load_max_32 node:$a, node:$b)>;
1707def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1708  , (atomic_load_max_64 node:$a, node:$b)>;
1709def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1710  (atomic_load_max_64 node:$a, node:$b)>;
1711def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1712  (atomic_load_max_64 node:$a, node:$b)>;
1713def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1714  (atomic_load_umax_32 node:$a, node:$b)>;
1715def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1716  (atomic_load_umax_32 node:$a, node:$b)>;
1717def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1718  (atomic_load_umax_32 node:$a, node:$b)>;
1719def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1720  (atomic_load_umax_64 node:$a, node:$b)>;
1721def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1722  (atomic_load_umax_64 node:$a, node:$b)>;
1723def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1724  (atomic_load_umax_64 node:$a, node:$b)>;
1725
1726defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1727  ".max", atomic_load_max_32_g, i32imm, imm>;
1728defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1729  ".max", atomic_load_max_32_s, i32imm, imm>;
1730defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1731  atomic_load_max_32_gen, i32imm, imm>;
1732defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1733  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1734defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1735  ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
1736defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1737  ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
1738defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1739  atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1740defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1741  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
1742defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1743  ".max", atomic_load_umax_32_g, i32imm, imm>;
1744defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1745  ".max", atomic_load_umax_32_s, i32imm, imm>;
1746defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1747  atomic_load_umax_32_gen, i32imm, imm>;
1748defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1749  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1750defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1751  ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
1752defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1753  ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
1754defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1755  atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1756defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1757  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
1758
1759// atom_min
1760
1761def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1762  (atomic_load_min_32 node:$a, node:$b)>;
1763def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1764  (atomic_load_min_32 node:$a, node:$b)>;
1765def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1766  (atomic_load_min_32 node:$a, node:$b)>;
1767def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1768  (atomic_load_min_64 node:$a, node:$b)>;
1769def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1770  (atomic_load_min_64 node:$a, node:$b)>;
1771def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1772  (atomic_load_min_64 node:$a, node:$b)>;
1773def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1774  (atomic_load_umin_32 node:$a, node:$b)>;
1775def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1776  (atomic_load_umin_32 node:$a, node:$b)>;
1777def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1778  (atomic_load_umin_32 node:$a, node:$b)>;
1779def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1780  (atomic_load_umin_64 node:$a, node:$b)>;
1781def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1782  (atomic_load_umin_64 node:$a, node:$b)>;
1783def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1784  (atomic_load_umin_64 node:$a, node:$b)>;
1785
1786defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1787  ".min", atomic_load_min_32_g, i32imm, imm>;
1788defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1789  ".min", atomic_load_min_32_s, i32imm, imm>;
1790defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1791  atomic_load_min_32_gen, i32imm, imm>;
1792defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1793  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1794defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1795  ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
1796defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1797  ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
1798defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1799  atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1800defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1801  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
1802defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1803  ".min", atomic_load_umin_32_g, i32imm, imm>;
1804defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1805  ".min", atomic_load_umin_32_s, i32imm, imm>;
1806defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1807  atomic_load_umin_32_gen, i32imm, imm>;
1808defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1809  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1810defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1811  ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
1812defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1813  ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
1814defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1815  atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1816defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1817  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
1818
1819// atom_inc  atom_dec
1820
1821def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1822  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1823def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1824  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1825def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1826  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1827def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1828  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1829def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1830  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1831def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1832  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1833
1834defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
1835  atomic_load_inc_32_g, i32imm, imm>;
1836defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
1837  atomic_load_inc_32_s, i32imm, imm>;
1838defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
1839  atomic_load_inc_32_gen, i32imm, imm>;
1840defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1841  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1842defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
1843  atomic_load_dec_32_g, i32imm, imm>;
1844defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
1845  atomic_load_dec_32_s, i32imm, imm>;
1846defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
1847  atomic_load_dec_32_gen, i32imm, imm>;
1848defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1849  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1850
1851// atom_and
1852
1853def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1854  (atomic_load_and_32 node:$a, node:$b)>;
1855def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1856  (atomic_load_and_32 node:$a, node:$b)>;
1857def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1858  (atomic_load_and_32 node:$a, node:$b)>;
1859def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1860  (atomic_load_and_64 node:$a, node:$b)>;
1861def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1862  (atomic_load_and_64 node:$a, node:$b)>;
1863def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1864  (atomic_load_and_64 node:$a, node:$b)>;
1865
1866defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1867  atomic_load_and_32_g, i32imm, imm>;
1868defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1869  atomic_load_and_32_s, i32imm, imm>;
1870defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1871  atomic_load_and_32_gen, i32imm, imm>;
1872defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1873  ".and", atomic_load_and_32_gen, i32imm, imm>;
1874defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1875  atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
1876defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1877  atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
1878defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1879  atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1880defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1881  ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
1882
1883// atom_or
1884
1885def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1886  (atomic_load_or_32 node:$a, node:$b)>;
1887def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1888  (atomic_load_or_32 node:$a, node:$b)>;
1889def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1890  (atomic_load_or_32 node:$a, node:$b)>;
1891def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1892  (atomic_load_or_64 node:$a, node:$b)>;
1893def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1894  (atomic_load_or_64 node:$a, node:$b)>;
1895def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1896  (atomic_load_or_64 node:$a, node:$b)>;
1897
1898defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1899  atomic_load_or_32_g, i32imm, imm>;
1900defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1901  atomic_load_or_32_gen, i32imm, imm>;
1902defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1903  ".or", atomic_load_or_32_gen, i32imm, imm>;
1904defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1905  atomic_load_or_32_s, i32imm, imm>;
1906defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1907  atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
1908defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1909  atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1910defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1911  ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
1912defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1913  atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
1914
1915// atom_xor
1916
1917def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1918  (atomic_load_xor_32 node:$a, node:$b)>;
1919def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1920  (atomic_load_xor_32 node:$a, node:$b)>;
1921def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1922  (atomic_load_xor_32 node:$a, node:$b)>;
1923def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1924  (atomic_load_xor_64 node:$a, node:$b)>;
1925def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1926  (atomic_load_xor_64 node:$a, node:$b)>;
1927def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1928  (atomic_load_xor_64 node:$a, node:$b)>;
1929
1930defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1931  atomic_load_xor_32_g, i32imm, imm>;
1932defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1933  atomic_load_xor_32_s, i32imm, imm>;
1934defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1935  atomic_load_xor_32_gen, i32imm, imm>;
1936defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1937  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1938defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1939  atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
1940defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1941  atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
1942defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1943  atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1944defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1945  ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
1946
1947// atom_cas
1948
1949def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1950  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1951def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1952  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1953def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1954  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1955def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1956  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1957def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1958  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1959def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1960  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1961
1962defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
1963  atomic_cmp_swap_32_g, i32imm>;
1964defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
1965  atomic_cmp_swap_32_s, i32imm>;
1966defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
1967  atomic_cmp_swap_32_gen, i32imm>;
1968defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
1969  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1970defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
1971  atomic_cmp_swap_64_g, i64imm>;
1972defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
1973  atomic_cmp_swap_64_s, i64imm>;
1974defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
1975  atomic_cmp_swap_64_gen, i64imm>;
1976defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
1977  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1978
1979// Support for scoped atomic operations.  Matches
1980// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1981// and converts it into the appropriate instruction.
1982// NOTE: not all possible combinations are implemented
1983//  'space' is limited to generic as it's the only one needed to support CUDA.
1984//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1985class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
1986                  dag ins, dag Operands>
1987      : NVPTXInst<(outs regclass:$result), ins,
1988                  AsmStr,
1989                  [(set (regT regclass:$result), Operands)]>,
1990        Requires<Preds>;
1991
1992// Define instruction variants for all addressing modes.
1993multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1994                       ValueType regT, NVPTXRegClass regclass, Operand ImmType,
1995                       SDNode Imm, ValueType ImmTy,
1996                       list<Predicate> Preds> {
1997  let AddedComplexity = 1 in {
1998    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
1999                      (ins Int32Regs:$src, regclass:$b),
2000                      (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
2001    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2002                      (ins Int64Regs:$src, regclass:$b),
2003                      (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
2004  }
2005  // tablegen can't infer argument types from Intrinsic (though it can
2006  // from Instruction) so we have to enforce specific type on
2007  // immediates via explicit cast to ImmTy.
2008  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2009                    (ins Int32Regs:$src, ImmType:$b),
2010                    (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
2011  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2012                    (ins Int64Regs:$src, ImmType:$b),
2013                    (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
2014}
2015
2016multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
2017                       ValueType regT, NVPTXRegClass regclass,
2018                       Operand ImmType, SDNode Imm, ValueType ImmTy,
2019                       list<Predicate> Preds> {
2020  // Variants for register/immediate permutations of $b and $c
2021  let AddedComplexity = 2 in {
2022    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2023                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
2024                      (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2025    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2026                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
2027                      (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2028  }
2029  let AddedComplexity = 1 in {
2030    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2031                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
2032                      (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2033    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2034                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
2035                      (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2036    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2037                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2038                      (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2039    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2040                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2041                      (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2042  }
2043  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2044                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2045                    (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2046  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2047                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2048                    (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2049}
2050
2051// Constructs intrinsic name and instruction asm strings.
2052multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2053                       string ScopeStr, string SpaceStr,
2054                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2055                       ValueType ImmTy, list<Predicate> Preds> {
2056  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2057                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2058                            # "." # OpStr # "." # TypeStr
2059                            # " \t$result, [$src], $b;",
2060                     !cast<Intrinsic>(
2061                            "int_nvvm_atomic_" # OpStr
2062                            # "_" # SpaceStr # "_" # IntTypeStr
2063                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2064                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
2065}
2066multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2067                       string ScopeStr, string SpaceStr,
2068                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2069                       ValueType ImmTy, list<Predicate> Preds> {
2070  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2071                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2072                            # "." # OpStr # "." # TypeStr
2073                            # " \t$result, [$src], $b, $c;",
2074                     !cast<Intrinsic>(
2075                            "int_nvvm_atomic_" # OpStr
2076                            # "_" # SpaceStr # "_" # IntTypeStr
2077                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2078                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
2079}
2080
2081// Constructs variants for different address spaces.
2082// For now we only need variants for generic space pointers.
2083multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2084                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2085                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2086   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2087                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
2088}
2089multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2090                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2091                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2092   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2093                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
2094}
2095
2096// Constructs variants for different scopes of atomic op.
2097multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2098                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2099                       ValueType ImmTy, list<Predicate> Preds> {
2100   // .gpu scope is default and is currently covered by existing
2101   // atomics w/o explicitly specified scope.
2102   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2103                           regT, regclass, ImmType, Imm, ImmTy,
2104                           !listconcat(Preds,[hasAtomScope])>;
2105   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2106                           regT, regclass, ImmType, Imm, ImmTy,
2107                           !listconcat(Preds,[hasAtomScope])>;
2108}
2109multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2110           ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2111           list<Predicate> Preds> {
2112   // No need to define ".gpu"-scoped atomics.  They do the same thing
2113   // as the regular, non-scoped atomics defined elsewhere.
2114   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2115                           regT, regclass, ImmType, Imm, ImmTy,
2116                           !listconcat(Preds,[hasAtomScope])>;
2117   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2118                           regT, regclass, ImmType, Imm, ImmTy,
2119                           !listconcat(Preds,[hasAtomScope])>;
2120}
2121
2122// atom.add
2123multiclass ATOM2_add_impl<string OpStr> {
2124   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2125   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2126   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2127   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2128                            []>;
2129   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2130                            [hasAtomAddF64]>;
2131}
2132
2133// atom.{and,or,xor}
2134multiclass ATOM2_bitwise_impl<string OpStr> {
2135   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2136   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2137                            [hasAtomBitwise64]>;
2138}
2139
2140// atom.exch
2141multiclass ATOM2_exch_impl<string OpStr> {
2142   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2143   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2144}
2145
2146// atom.{min,max}
2147multiclass ATOM2_minmax_impl<string OpStr> {
2148   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2149   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2150   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2151                            [hasAtomMinMax64]>;
2152   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2153                            [hasAtomMinMax64]>;
2154}
2155
2156// atom.{inc,dec}
2157multiclass ATOM2_incdec_impl<string OpStr> {
2158   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2159}
2160
2161// atom.cas
2162multiclass ATOM3_cas_impl<string OpStr> {
2163   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2164   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2165}
2166
2167defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2168defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2169defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2170defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2171defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2172defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2173defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2174defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2175defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
2176defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2177
2178//-----------------------------------
2179// Support for ldu on sm_20 or later
2180//-----------------------------------
2181
2182// Don't annotate ldu instructions as mayLoad, as they load from memory that is
2183// read-only in a kernel.
2184
2185// Scalar
2186
2187multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2188  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2189               !strconcat("ldu.global.", TyStr),
2190                      []>, Requires<[hasLDU]>;
2191  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2192               !strconcat("ldu.global.", TyStr),
2193                        []>, Requires<[hasLDU]>;
2194 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2195               !strconcat("ldu.global.", TyStr),
2196                      []>, Requires<[hasLDU]>;
2197 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2198               !strconcat("ldu.global.", TyStr),
2199                      []>, Requires<[hasLDU]>;
2200 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2201               !strconcat("ldu.global.", TyStr),
2202                        []>, Requires<[hasLDU]>;
2203}
2204
2205defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2206defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2207defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2208defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2209defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2210defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2211
2212// vector
2213
2214// Elementized vector ldu
2215multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2216 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2217                     (ins Int32Regs:$src),
2218                     !strconcat("ldu.global.", TyStr), []>;
2219 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2220                     (ins Int64Regs:$src),
2221                     !strconcat("ldu.global.", TyStr), []>;
2222 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2223                     (ins MEMri:$src),
2224                     !strconcat("ldu.global.", TyStr), []>;
2225 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2226                     (ins MEMri64:$src),
2227                     !strconcat("ldu.global.", TyStr), []>;
2228 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2229                     (ins imemAny:$src),
2230                     !strconcat("ldu.global.", TyStr), []>;
2231}
2232
2233multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2234 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2235                            regclass:$dst4), (ins Int32Regs:$src),
2236               !strconcat("ldu.global.", TyStr), []>;
2237 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2238                            regclass:$dst4), (ins Int64Regs:$src),
2239               !strconcat("ldu.global.", TyStr), []>;
2240 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2241                            regclass:$dst4), (ins MEMri:$src),
2242               !strconcat("ldu.global.", TyStr), []>;
2243 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2244                            regclass:$dst4), (ins MEMri64:$src),
2245               !strconcat("ldu.global.", TyStr), []>;
2246 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2247                            regclass:$dst4), (ins imemAny:$src),
2248               !strconcat("ldu.global.", TyStr), []>;
2249}
2250
2251defm INT_PTX_LDU_G_v2i8_ELE
2252  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2253defm INT_PTX_LDU_G_v2i16_ELE
2254  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2255defm INT_PTX_LDU_G_v2i32_ELE
2256  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2257defm INT_PTX_LDU_G_v2f32_ELE
2258  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2259defm INT_PTX_LDU_G_v2i64_ELE
2260  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2261defm INT_PTX_LDU_G_v2f64_ELE
2262  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2263defm INT_PTX_LDU_G_v4i8_ELE
2264  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2265defm INT_PTX_LDU_G_v4i16_ELE
2266  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2267    Int16Regs>;
2268defm INT_PTX_LDU_G_v4i32_ELE
2269  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2270    Int32Regs>;
2271defm INT_PTX_LDU_G_v4f16_ELE
2272  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2273    Int16Regs>;
2274defm INT_PTX_LDU_G_v4f16x2_ELE
2275  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2276    Int32Regs>;
2277defm INT_PTX_LDU_G_v4f32_ELE
2278  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2279    Float32Regs>;
2280
2281
2282//-----------------------------------
2283// Support for ldg on sm_35 or later
2284//-----------------------------------
2285
2286// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2287// non-coherent texture cache, and therefore the values read must be read-only
2288// during the lifetime of the kernel.
2289
2290multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2291  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2292               !strconcat("ld.global.nc.", TyStr),
2293                      []>, Requires<[hasLDG]>;
2294  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2295               !strconcat("ld.global.nc.", TyStr),
2296                        []>, Requires<[hasLDG]>;
2297 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2298               !strconcat("ld.global.nc.", TyStr),
2299                      []>, Requires<[hasLDG]>;
2300 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2301               !strconcat("ld.global.nc.", TyStr),
2302                      []>, Requires<[hasLDG]>;
2303 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2304               !strconcat("ld.global.nc.", TyStr),
2305                        []>, Requires<[hasLDG]>;
2306}
2307
2308defm INT_PTX_LDG_GLOBAL_i8
2309  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2310defm INT_PTX_LDG_GLOBAL_i16
2311  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2312defm INT_PTX_LDG_GLOBAL_i32
2313  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2314defm INT_PTX_LDG_GLOBAL_i64
2315  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2316defm INT_PTX_LDG_GLOBAL_f32
2317  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2318defm INT_PTX_LDG_GLOBAL_f64
2319  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2320
2321// vector
2322
2323// Elementized vector ldg
2324multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2325 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2326                     (ins Int32Regs:$src),
2327                     !strconcat("ld.global.nc.", TyStr), []>;
2328 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2329                     (ins Int64Regs:$src),
2330                     !strconcat("ld.global.nc.", TyStr), []>;
2331 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2332                     (ins MEMri:$src),
2333                     !strconcat("ld.global.nc.", TyStr), []>;
2334 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2335                     (ins MEMri64:$src),
2336                     !strconcat("ld.global.nc.", TyStr), []>;
2337 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2338                     (ins imemAny:$src),
2339                     !strconcat("ld.global.nc.", TyStr), []>;
2340}
2341
2342multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2343  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2344                              regclass:$dst4), (ins Int32Regs:$src),
2345               !strconcat("ld.global.nc.", TyStr), []>;
2346  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2347                               regclass:$dst4), (ins Int64Regs:$src),
2348               !strconcat("ld.global.nc.", TyStr), []>;
2349  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2350                              regclass:$dst4), (ins MEMri:$src),
2351               !strconcat("ld.global.nc.", TyStr), []>;
2352  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2353                              regclass:$dst4), (ins MEMri64:$src),
2354               !strconcat("ld.global.nc.", TyStr), []>;
2355  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2356                             regclass:$dst4), (ins imemAny:$src),
2357               !strconcat("ld.global.nc.", TyStr), []>;
2358}
2359
2360// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2361defm INT_PTX_LDG_G_v2i8_ELE
2362  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2363defm INT_PTX_LDG_G_v2i16_ELE
2364  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2365defm INT_PTX_LDG_G_v2i32_ELE
2366  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2367defm INT_PTX_LDG_G_v2f32_ELE
2368  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2369defm INT_PTX_LDG_G_v2i64_ELE
2370  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2371defm INT_PTX_LDG_G_v2f64_ELE
2372  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2373defm INT_PTX_LDG_G_v4i8_ELE
2374  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2375defm INT_PTX_LDG_G_v4i16_ELE
2376  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2377defm INT_PTX_LDG_G_v4i32_ELE
2378  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2379defm INT_PTX_LDG_G_v4f32_ELE
2380  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2381
2382
2383multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2384   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2385          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2386      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2387   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2388          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2389      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2390   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2391          "{{ .reg .b64 %tmp;\n\t"
2392          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2393          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2394      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2395      Requires<[useShortPtr]>;
2396}
2397
2398multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2399   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2400          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2401      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2402   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2403          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2404      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2405   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2406          "{{ .reg .b64 %tmp;\n\t"
2407          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2408          #"  cvt.u32.u64 \t$result, %tmp; }}",
2409      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2410      Requires<[useShortPtr]>;
2411}
2412
2413defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2414defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2415defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2416defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2417
2418defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2419defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2420defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2421defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2422
2423
2424// nvvm.ptr.gen.to.param
2425def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2426  (ins Int32Regs:$src),
2427                        "mov.u32 \t$result, $src;",
2428                              [(set Int32Regs:$result,
2429                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2430def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2431  (ins Int64Regs:$src),
2432                        "mov.u64 \t$result, $src;",
2433                              [(set Int64Regs:$result,
2434                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2435
2436
2437// nvvm.move intrinsicc
2438def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2439                             "mov.b16 \t$r, $s;",
2440                             [(set Int16Regs:$r,
2441                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2442def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2443                             "mov.b32 \t$r, $s;",
2444                             [(set Int32Regs:$r,
2445                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2446def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2447                             "mov.b64 \t$r, $s;",
2448                             [(set Int64Regs:$r,
2449                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2450def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2451                             "mov.f32 \t$r, $s;",
2452                             [(set Float32Regs:$r,
2453                               (int_nvvm_move_float Float32Regs:$s))]>;
2454def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2455                             "mov.f64 \t$r, $s;",
2456                             [(set Float64Regs:$r,
2457                               (int_nvvm_move_double Float64Regs:$s))]>;
2458def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2459                             "mov.u32 \t$r, $s;",
2460                             [(set Int32Regs:$r,
2461                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2462def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2463                             "mov.u64 \t$r, $s;",
2464                             [(set Int64Regs:$r,
2465                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2466
2467// @TODO: Are these actually needed, or will we always just see symbols
2468// copied to registers first?
2469/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2470                             "mov.u32 \t$r, $s;",
2471                             [(set Int32Regs:$r,
2472                             (int_nvvm_move_ptr texternalsym:$s))]>;
2473def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2474                             "mov.u64 \t$r, $s;",
2475                             [(set Int64Regs:$r,
2476                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2477
2478
2479// MoveParam        %r1, param
2480// ptr_local_to_gen %r2, %r1
2481// ptr_gen_to_local %r3, %r2
2482// ->
2483// mov %r1, param
2484
2485// @TODO: Revisit this.  There is a type
2486// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2487// instructions are not currently defined. However, we can use the ptr
2488// variants and the asm printer will do the right thing.
2489def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2490                (MoveParam texternalsym:$src)))),
2491               (nvvm_move_ptr64  texternalsym:$src)>;
2492def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2493                (MoveParam texternalsym:$src)))),
2494               (nvvm_move_ptr32  texternalsym:$src)>;
2495
2496def texsurf_handles
2497  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2498              "mov.u64 \t$result, $src;", []>;
2499
2500//-----------------------------------
2501// Compiler Error Warn
2502// - Just ignore them in codegen
2503//-----------------------------------
2504
2505def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2506                "// llvm.nvvm.compiler.warn()",
2507                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2508def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2509                "// llvm.nvvm.compiler.warn()",
2510                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2511def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2512                "// llvm.nvvm.compiler.error()",
2513                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2514def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2515                "// llvm.nvvm.compiler.error()",
2516                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2517
2518
2519// isspacep
2520
2521multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2522  def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2523              "isspacep." # suffix # "\t$d, $a;",
2524              [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2525    Requires<Preds>;
2526  def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2527              "isspacep." # suffix # "\t$d, $a;",
2528              [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2529    Requires<Preds>;
2530}
2531
2532defm isspace_const  : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2533defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2534defm isspace_local  : ISSPACEP<"local", int_nvvm_isspacep_local>;
2535defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2536defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2537                                       int_nvvm_isspacep_shared_cluster,
2538                                       [hasPTX<78>, hasSM<90>]>;
2539
2540// Special register reads
2541def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2542                            (ins SpecialRegs:$r),
2543                            "mov.b32 \t$d, $r;", []>;
2544
2545def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2546def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2547def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2548def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2549def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2550def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2551def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2552def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2553def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2554def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2555def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2556def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2557def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2558def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2559def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2560def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2561def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2562def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2563def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2564def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2565def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2566def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2567def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2568def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2569def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2570def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2571def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2572def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2573def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2574def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2575def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2576def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2577
2578
2579// rotate builtin support
2580
2581def ROTATE_B32_HW_IMM
2582  : NVPTXInst<(outs Int32Regs:$dst),
2583              (ins  Int32Regs:$src, i32imm:$amt),
2584              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2585              [(set Int32Regs:$dst,
2586                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2587              Requires<[hasHWROT32]> ;
2588
2589def ROTATE_B32_HW_REG
2590  : NVPTXInst<(outs Int32Regs:$dst),
2591              (ins  Int32Regs:$src, Int32Regs:$amt),
2592              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2593              [(set Int32Regs:$dst,
2594                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2595              Requires<[hasHWROT32]> ;
2596
2597def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2598          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2599      Requires<[noHWROT32]> ;
2600
2601def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2602          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2603      Requires<[noHWROT32]> ;
2604
2605let hasSideEffects = false in {
2606  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2607    !strconcat("{{\n\t",
2608               ".reg .b32 %dummy;\n\t",
2609               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2610               "}}"),
2611          []> ;
2612
2613  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2614    !strconcat("{{\n\t",
2615               ".reg .b32 %dummy;\n\t",
2616               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2617               "}}"),
2618          []> ;
2619}
2620
2621let hasSideEffects = false in {
2622  def PACK_TWO_INT32
2623    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2624                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2625}
2626
2627def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2628          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2629                          (GET_LO_INT64 Int64Regs:$src))> ;
2630
2631// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2632// no side effects.
2633let hasSideEffects = false in {
2634  def SHF_L_WRAP_B32_IMM
2635    : NVPTXInst<(outs Int32Regs:$dst),
2636                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2637                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2638      Requires<[hasHWROT32]>;
2639
2640  def SHF_L_WRAP_B32_REG
2641    : NVPTXInst<(outs Int32Regs:$dst),
2642                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2643                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2644      Requires<[hasHWROT32]>;
2645
2646  def SHF_R_WRAP_B32_IMM
2647    : NVPTXInst<(outs Int32Regs:$dst),
2648                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2649                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2650      Requires<[hasHWROT32]>;
2651
2652  def SHF_R_WRAP_B32_REG
2653    : NVPTXInst<(outs Int32Regs:$dst),
2654                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2655                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2656      Requires<[hasHWROT32]>;
2657}
2658
2659// HW version of rotate 64
2660def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2661          (PACK_TWO_INT32
2662            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2663                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2664            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2665                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2666      Requires<[hasHWROT32]>;
2667
2668def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2669          (PACK_TWO_INT32
2670            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2671                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2672            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2673                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2674      Requires<[hasHWROT32]>;
2675
2676
2677def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2678          (PACK_TWO_INT32
2679            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2680                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2681            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2682                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2683      Requires<[hasHWROT32]>;
2684
2685def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2686          (PACK_TWO_INT32
2687            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2688                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2689            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2690                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2691      Requires<[hasHWROT32]>;
2692
2693// SW version of rotate 64
2694def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2695          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2696      Requires<[noHWROT32]>;
2697def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2698          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2699      Requires<[noHWROT32]>;
2700def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2701          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2702      Requires<[noHWROT32]>;
2703def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2704          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2705      Requires<[noHWROT32]>;
2706
2707
2708//-----------------------------------
2709// Texture Intrinsics
2710//-----------------------------------
2711
2712// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2713// also defined in NVPTXReplaceImageHandles.cpp
2714
2715// texmode_independent
2716let IsTex = true, IsTexModeUnified = false in {
2717// Texture fetch instructions using handles
2718
2719class TEX_1D_base<string inst, NVPTXRegClass outtype,
2720                  NVPTXRegClass intype, dag texsamp>
2721    : NVPTXInst<(outs outtype:$r, outtype:$g,
2722                      outtype:$b, outtype:$a),
2723                 !con(texsamp, (ins intype:$x)),
2724                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2725                 []>;
2726
2727multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2728  def _RR : TEX_1D_base<inst, outtype, intype,
2729                        (ins Int64Regs:$t, Int64Regs:$s)>;
2730  def _RI : TEX_1D_base<inst, outtype, intype,
2731                        (ins Int64Regs:$t, i64imm:$s)>;
2732  def _IR : TEX_1D_base<inst, outtype, intype,
2733                        (ins i64imm:$t, Int64Regs:$s)>;
2734  def _II : TEX_1D_base<inst, outtype, intype,
2735                        (ins i64imm:$t, i64imm:$s)>;
2736}
2737
2738defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2739defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2740defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2741defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2742defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2743defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2744
2745class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2746                        NVPTXRegClass intype, dag texsamp>
2747    : NVPTXInst<(outs outtype:$r, outtype:$g,
2748                      outtype:$b, outtype:$a),
2749                 !con(texsamp, (ins intype:$x, intype:$lod)),
2750                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2751                 []>;
2752
2753multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2754                        NVPTXRegClass intype> {
2755  def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2756                              (ins Int64Regs:$t, Int64Regs:$s)>;
2757  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2758                              (ins Int64Regs:$t, i64imm:$s)>;
2759  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2760                              (ins i64imm:$t, Int64Regs:$s)>;
2761  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2762                              (ins i64imm:$t, i64imm:$s)>;
2763}
2764
2765defm TEX_1D_F32_F32_LEVEL :
2766  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2767defm TEX_1D_S32_F32_LEVEL :
2768  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2769defm TEX_1D_U32_F32_LEVEL :
2770  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2771
2772class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2773                       NVPTXRegClass intype, dag texsamp>
2774    : NVPTXInst<(outs outtype:$r, outtype:$g,
2775                      outtype:$b, outtype:$a),
2776                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2777                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2778                        " \\{$gradx\\}, \\{$grady\\};",
2779                 []>;
2780
2781multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2782                       NVPTXRegClass intype> {
2783  def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2784                             (ins Int64Regs:$t, Int64Regs:$s)>;
2785  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2786                             (ins Int64Regs:$t, i64imm:$s)>;
2787  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2788                             (ins i64imm:$t, Int64Regs:$s)>;
2789  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2790                             (ins i64imm:$t, i64imm:$s)>;
2791}
2792
2793defm TEX_1D_F32_F32_GRAD
2794  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2795defm TEX_1D_S32_F32_GRAD
2796  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2797defm TEX_1D_U32_F32_GRAD
2798  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2799
2800class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2801                        NVPTXRegClass intype, dag texsamp>
2802    : NVPTXInst<(outs outtype:$r, outtype:$g,
2803                      outtype:$b, outtype:$a),
2804                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2805                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2806                 []>;
2807
2808multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2809                        NVPTXRegClass intype> {
2810  def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2811                              (ins Int64Regs:$t, Int64Regs:$s)>;
2812  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2813                              (ins Int64Regs:$t, i64imm:$s)>;
2814  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2815                              (ins i64imm:$t, Int64Regs:$s)>;
2816  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2817                              (ins i64imm:$t, i64imm:$s)>;
2818}
2819
2820defm TEX_1D_ARRAY_F32_F32
2821  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2822defm TEX_1D_ARRAY_F32_S32
2823  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2824defm TEX_1D_ARRAY_S32_S32
2825  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2826defm TEX_1D_ARRAY_S32_F32
2827  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2828defm TEX_1D_ARRAY_U32_S32
2829  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2830defm TEX_1D_ARRAY_U32_F32
2831  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2832
2833class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2834                              NVPTXRegClass intype, dag texsamp>
2835    : NVPTXInst<(outs outtype:$r, outtype:$g,
2836                      outtype:$b, outtype:$a),
2837                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2838                 inst # " \t\\{$r, $g, $b, $a\\},"
2839                        " [$t, $s, \\{$l, $x\\}], $lod;",
2840                 []>;
2841
2842multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2843                              NVPTXRegClass intype> {
2844  def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2845                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2846  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2847                                    (ins Int64Regs:$t, i64imm:$s)>;
2848  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2849                                    (ins i64imm:$t, Int64Regs:$s)>;
2850  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2851                                    (ins i64imm:$t, i64imm:$s)>;
2852}
2853
2854defm TEX_1D_ARRAY_F32_F32_LEVEL
2855  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2856defm TEX_1D_ARRAY_S32_F32_LEVEL
2857  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2858defm TEX_1D_ARRAY_U32_F32_LEVEL
2859  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2860
2861class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2862                             NVPTXRegClass intype, dag texsamp>
2863    : NVPTXInst<(outs outtype:$r, outtype:$g,
2864                      outtype:$b, outtype:$a),
2865                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2866                                    intype:$gradx, intype:$grady)),
2867                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2868                        " \\{$gradx\\}, \\{$grady\\};",
2869                 []>;
2870
2871multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2872                             NVPTXRegClass intype> {
2873  def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2874                                   (ins Int64Regs:$t, Int64Regs:$s)>;
2875  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2876                                   (ins Int64Regs:$t, i64imm:$s)>;
2877  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2878                                   (ins i64imm:$t, Int64Regs:$s)>;
2879  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2880                                   (ins i64imm:$t, i64imm:$s)>;
2881}
2882
2883defm TEX_1D_ARRAY_F32_F32_GRAD
2884  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2885defm TEX_1D_ARRAY_S32_F32_GRAD
2886  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2887defm TEX_1D_ARRAY_U32_F32_GRAD
2888  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2889
2890class TEX_2D_base<string inst, NVPTXRegClass outtype,
2891                  NVPTXRegClass intype, dag texsamp>
2892    : NVPTXInst<(outs outtype:$r, outtype:$g,
2893                      outtype:$b, outtype:$a),
2894                 !con(texsamp, (ins intype:$x, intype:$y)),
2895                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2896                 []>;
2897
2898multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2899  def _RR : TEX_2D_base<inst, outtype, intype,
2900                        (ins Int64Regs:$t, Int64Regs:$s)>;
2901  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2902  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2903  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2904}
2905
2906defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2907defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2908defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2909defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2910defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2911defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2912
2913class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2914                        NVPTXRegClass intype, dag texsamp>
2915    : NVPTXInst<(outs outtype:$r, outtype:$g,
2916                      outtype:$b, outtype:$a),
2917                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2918                 inst # " \t\\{$r, $g, $b, $a\\},"
2919                        " [$t, $s, \\{$x, $y\\}], $lod;",
2920                 []>;
2921
2922multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2923                        NVPTXRegClass intype> {
2924  def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2925                              (ins Int64Regs:$t, Int64Regs:$s)>;
2926  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2927                              (ins Int64Regs:$t, i64imm:$s)>;
2928  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2929                              (ins i64imm:$t, Int64Regs:$s)>;
2930  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2931                              (ins i64imm:$t, i64imm:$s)>;
2932}
2933
2934defm TEX_2D_F32_F32_LEVEL :
2935  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2936defm TEX_2D_S32_F32_LEVEL :
2937  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2938defm TEX_2D_U32_F32_LEVEL :
2939  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2940
2941class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2942                       NVPTXRegClass intype, dag texsamp>
2943    : NVPTXInst<(outs outtype:$r, outtype:$g,
2944                      outtype:$b, outtype:$a),
2945                 !con(texsamp, (ins intype:$x, intype:$y,
2946                                    intype:$gradx0, intype:$gradx1,
2947                                    intype:$grady0, intype:$grady1)),
2948                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2949                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2950                 []>;
2951
2952multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2953                       NVPTXRegClass intype> {
2954  def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2955                              (ins Int64Regs:$t, Int64Regs:$s)>;
2956  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2957                              (ins Int64Regs:$t, i64imm:$s)>;
2958  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2959                              (ins i64imm:$t, Int64Regs:$s)>;
2960  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2961                              (ins i64imm:$t, i64imm:$s)>;
2962}
2963
2964defm TEX_2D_F32_F32_GRAD :
2965  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2966defm TEX_2D_S32_F32_GRAD :
2967  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2968defm TEX_2D_U32_F32_GRAD :
2969  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2970
2971class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2972                        NVPTXRegClass intype, dag texsamp>
2973    : NVPTXInst<(outs outtype:$r, outtype:$g,
2974                      outtype:$b, outtype:$a),
2975                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2976                 inst # " \t\\{$r, $g, $b, $a\\},"
2977                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
2978                 []>;
2979
2980multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2981                        NVPTXRegClass intype> {
2982  def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2983                              (ins Int64Regs:$t, Int64Regs:$s)>;
2984  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2985                              (ins Int64Regs:$t, i64imm:$s)>;
2986  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2987                              (ins i64imm:$t, Int64Regs:$s)>;
2988  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2989                              (ins i64imm:$t, i64imm:$s)>;
2990}
2991
2992defm TEX_2D_ARRAY_F32_F32
2993  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2994defm TEX_2D_ARRAY_F32_S32
2995  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2996defm TEX_2D_ARRAY_S32_S32
2997  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2998defm TEX_2D_ARRAY_S32_F32
2999  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3000defm TEX_2D_ARRAY_U32_S32
3001  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3002defm TEX_2D_ARRAY_U32_F32
3003  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3004
3005class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3006                              NVPTXRegClass intype, dag texsamp>
3007    : NVPTXInst<(outs outtype:$r, outtype:$g,
3008                      outtype:$b, outtype:$a),
3009                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3010                                    intype:$lod)),
3011                 inst # " \t\\{$r, $g, $b, $a\\},"
3012                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
3013                 []>;
3014
3015multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3016                              NVPTXRegClass intype> {
3017  def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3018                              (ins Int64Regs:$t, Int64Regs:$s)>;
3019  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3020                              (ins Int64Regs:$t, i64imm:$s)>;
3021  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3022                              (ins i64imm:$t, Int64Regs:$s)>;
3023  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3024                              (ins i64imm:$t, i64imm:$s)>;
3025}
3026
3027defm TEX_2D_ARRAY_F32_F32_LEVEL
3028  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3029defm TEX_2D_ARRAY_S32_F32_LEVEL
3030  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3031defm TEX_2D_ARRAY_U32_F32_LEVEL
3032  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3033
3034class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3035                             NVPTXRegClass intype, dag texsamp>
3036    : NVPTXInst<(outs outtype:$r, outtype:$g,
3037                      outtype:$b, outtype:$a),
3038                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3039                                    intype:$gradx0, intype:$gradx1,
3040                                    intype:$grady0, intype:$grady1)),
3041                 inst # " \t\\{$r, $g, $b, $a\\},"
3042                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
3043                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3044                 []>;
3045
3046multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3047                             NVPTXRegClass intype> {
3048  def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3049                              (ins Int64Regs:$t, Int64Regs:$s)>;
3050  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3051                              (ins Int64Regs:$t, i64imm:$s)>;
3052  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3053                              (ins i64imm:$t, Int64Regs:$s)>;
3054  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3055                              (ins i64imm:$t, i64imm:$s)>;
3056}
3057
3058defm TEX_2D_ARRAY_F32_F32_GRAD
3059  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3060defm TEX_2D_ARRAY_S32_F32_GRAD
3061  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3062defm TEX_2D_ARRAY_U32_F32_GRAD
3063  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3064
3065class TEX_3D_base<string inst, NVPTXRegClass outtype,
3066                  NVPTXRegClass intype, dag texsamp>
3067    : NVPTXInst<(outs outtype:$r, outtype:$g,
3068                      outtype:$b, outtype:$a),
3069                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3070                 inst # " \t\\{$r, $g, $b, $a\\},"
3071                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3072                 []>;
3073
3074multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3075  def _RR : TEX_3D_base<inst, outtype, intype,
3076                              (ins Int64Regs:$t, Int64Regs:$s)>;
3077  def _RI : TEX_3D_base<inst, outtype, intype,
3078                              (ins Int64Regs:$t, i64imm:$s)>;
3079  def _IR : TEX_3D_base<inst, outtype, intype,
3080                              (ins i64imm:$t, Int64Regs:$s)>;
3081  def _II : TEX_3D_base<inst, outtype, intype,
3082                              (ins i64imm:$t, i64imm:$s)>;
3083}
3084
3085defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3086defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3087defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3088defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3089defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3090defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3091
3092class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3093                        NVPTXRegClass intype, dag texsamp>
3094    : NVPTXInst<(outs outtype:$r, outtype:$g,
3095                      outtype:$b, outtype:$a),
3096                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3097                                    intype:$lod)),
3098                 inst # " \t\\{$r, $g, $b, $a\\},"
3099                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3100                 []>;
3101
3102multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3103                        NVPTXRegClass intype> {
3104  def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3105                              (ins Int64Regs:$t, Int64Regs:$s)>;
3106  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3107                              (ins Int64Regs:$t, i64imm:$s)>;
3108  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3109                              (ins i64imm:$t, Int64Regs:$s)>;
3110  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3111                              (ins i64imm:$t, i64imm:$s)>;
3112}
3113
3114defm TEX_3D_F32_F32_LEVEL
3115  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3116defm TEX_3D_S32_F32_LEVEL
3117  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3118defm TEX_3D_U32_F32_LEVEL
3119  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3120
3121class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3122                       NVPTXRegClass intype, dag texsamp>
3123    : NVPTXInst<(outs outtype:$r, outtype:$g,
3124                      outtype:$b, outtype:$a),
3125                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3126                                    intype :$gradx0, intype:$gradx1,
3127                                    intype:$gradx2, intype:$grady0,
3128                                    intype:$grady1, intype:$grady2)),
3129                 inst # " \t\\{$r, $g, $b, $a\\},"
3130                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
3131                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3132                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3133                 []>;
3134
3135multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3136                       NVPTXRegClass intype> {
3137  def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3138                             (ins Int64Regs:$t, Int64Regs:$s)>;
3139  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3140                             (ins Int64Regs:$t, i64imm:$s)>;
3141  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3142                             (ins i64imm:$t, Int64Regs:$s)>;
3143  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3144                             (ins i64imm:$t, i64imm:$s)>;
3145}
3146
3147defm TEX_3D_F32_F32_GRAD
3148  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3149defm TEX_3D_S32_F32_GRAD
3150  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3151defm TEX_3D_U32_F32_GRAD
3152  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3153
3154class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3155                    NVPTXRegClass intype, dag texsamp>
3156    : NVPTXInst<(outs outtype:$r, outtype:$g,
3157                      outtype:$b, outtype:$a),
3158                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3159                 inst # " \t\\{$r, $g, $b, $a\\},"
3160                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3161                 []>;
3162
3163multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3164  def _RR : TEX_CUBE_base<inst, outtype, intype,
3165                          (ins Int64Regs:$t, Int64Regs:$s)>;
3166  def _RI : TEX_CUBE_base<inst, outtype, intype,
3167                          (ins Int64Regs:$t, i64imm:$s)>;
3168  def _IR : TEX_CUBE_base<inst, outtype, intype,
3169                          (ins i64imm:$t, Int64Regs:$s)>;
3170  def _II : TEX_CUBE_base<inst, outtype, intype,
3171                          (ins i64imm:$t, i64imm:$s)>;
3172}
3173
3174defm TEX_CUBE_F32_F32
3175  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3176defm TEX_CUBE_S32_F32
3177  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3178defm TEX_CUBE_U32_F32
3179  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3180
3181class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3182                          NVPTXRegClass intype, dag texsamp>
3183    : NVPTXInst<(outs outtype:$r, outtype:$g,
3184                      outtype:$b, outtype:$a),
3185                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3186                                    intype:$lod)),
3187                 inst # " \t\\{$r, $g, $b, $a\\},"
3188                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3189                 []>;
3190
3191multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3192                          NVPTXRegClass intype> {
3193  def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3194                                (ins Int64Regs:$t, Int64Regs:$s)>;
3195  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3196                                (ins Int64Regs:$t, i64imm:$s)>;
3197  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3198                                (ins i64imm:$t, Int64Regs:$s)>;
3199  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3200                                (ins i64imm:$t, i64imm:$s)>;
3201}
3202
3203defm TEX_CUBE_F32_F32_LEVEL
3204  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3205defm TEX_CUBE_S32_F32_LEVEL
3206  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3207defm TEX_CUBE_U32_F32_LEVEL
3208  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3209
3210class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3211                          NVPTXRegClass intype, dag texsamp>
3212    : NVPTXInst<(outs outtype:$r, outtype:$g,
3213                      outtype:$b, outtype:$a),
3214                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3215                                    intype:$z)),
3216                 inst # " \t\\{$r, $g, $b, $a\\},"
3217                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
3218                 []>;
3219
3220multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3221                          NVPTXRegClass intype> {
3222  def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3223                                (ins Int64Regs:$t, Int64Regs:$s)>;
3224  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3225                                (ins Int64Regs:$t, i64imm:$s)>;
3226  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3227                                (ins i64imm:$t, Int64Regs:$s)>;
3228  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3229                                (ins i64imm:$t, i64imm:$s)>;
3230}
3231
3232defm TEX_CUBE_ARRAY_F32_F32
3233  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3234defm TEX_CUBE_ARRAY_S32_F32
3235  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3236defm TEX_CUBE_ARRAY_U32_F32
3237  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3238
3239class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3240                                NVPTXRegClass intype, dag texsamp>
3241    : NVPTXInst<(outs outtype:$r, outtype:$g,
3242                      outtype:$b, outtype:$a),
3243                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3244                                    intype:$z, intype:$lod)),
3245                 inst # " \t\\{$r, $g, $b, $a\\},"
3246                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3247                 []>;
3248
3249multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3250                                NVPTXRegClass intype> {
3251  def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3252                                      (ins Int64Regs:$t, Int64Regs:$s)>;
3253  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3254                                      (ins Int64Regs:$t, i64imm:$s)>;
3255  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3256                                      (ins i64imm:$t, Int64Regs:$s)>;
3257  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3258                                      (ins i64imm:$t, i64imm:$s)>;
3259}
3260
3261defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3262  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3263                         Float32Regs, Float32Regs>;
3264defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3265  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3266                         Int32Regs, Float32Regs>;
3267defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3268  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3269                         Int32Regs, Float32Regs>;
3270
3271class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3272                   NVPTXRegClass intype, dag texsamp>
3273    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3274                      outtype:$v2, outtype:$v3),
3275                 !con(texsamp, (ins intype:$x, intype:$y)),
3276                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3277                 []>;
3278
3279multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3280  def _RR : TLD4_2D_base<inst, outtype, intype,
3281                         (ins Int64Regs:$t, Int64Regs:$s)>;
3282  def _RI : TLD4_2D_base<inst, outtype, intype,
3283                         (ins Int64Regs:$t, i64imm:$s)>;
3284  def _IR : TLD4_2D_base<inst, outtype, intype,
3285                         (ins i64imm:$t, Int64Regs:$s)>;
3286  def _II : TLD4_2D_base<inst, outtype, intype,
3287                         (ins i64imm:$t, i64imm:$s)>;
3288}
3289
3290defm TLD4_R_2D_F32_F32
3291  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3292defm TLD4_G_2D_F32_F32
3293  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3294defm TLD4_B_2D_F32_F32
3295  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3296defm TLD4_A_2D_F32_F32
3297  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3298
3299defm TLD4_R_2D_S32_F32
3300  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3301defm TLD4_G_2D_S32_F32
3302  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3303defm TLD4_B_2D_S32_F32
3304  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3305defm TLD4_A_2D_S32_F32
3306  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3307
3308defm TLD4_R_2D_U32_F32
3309  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3310defm TLD4_G_2D_U32_F32
3311  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3312defm TLD4_B_2D_U32_F32
3313  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3314defm TLD4_A_2D_U32_F32
3315  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3316
3317}
3318
3319
3320// texmode_unified
3321let IsTex = true, IsTexModeUnified = true in {
3322// Texture fetch instructions using handles
3323
3324class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3325                          NVPTXRegClass intype, dag tex>
3326    : NVPTXInst<(outs outtype:$r, outtype:$g,
3327                      outtype:$b, outtype:$a),
3328                 !con(tex, (ins intype:$x)),
3329                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3330                 []>;
3331
3332multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3333                          NVPTXRegClass intype> {
3334  def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3335  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3336}
3337
3338defm TEX_UNIFIED_1D_F32_S32
3339  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3340defm TEX_UNIFIED_1D_F32_F32
3341  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3342defm TEX_UNIFIED_1D_S32_S32
3343  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3344defm TEX_UNIFIED_1D_S32_F32
3345  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3346defm TEX_UNIFIED_1D_U32_S32
3347  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3348defm TEX_UNIFIED_1D_U32_F32
3349  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3350
3351class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3352                                NVPTXRegClass intype, dag tex>
3353    : NVPTXInst<(outs outtype:$r, outtype:$g,
3354                      outtype:$b, outtype:$a),
3355                 !con(tex, (ins intype:$x, intype:$lod)),
3356                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3357                 []>;
3358
3359multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3360                                NVPTXRegClass intype> {
3361  def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3362  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3363}
3364
3365defm TEX_UNIFIED_1D_F32_F32_LEVEL
3366  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3367defm TEX_UNIFIED_1D_S32_F32_LEVEL
3368  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3369defm TEX_UNIFIED_1D_U32_F32_LEVEL
3370  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3371
3372class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3373                               NVPTXRegClass intype, dag tex>
3374    : NVPTXInst<(outs outtype:$r, outtype:$g,
3375                      outtype:$b, outtype:$a),
3376                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3377                 inst # " \t\\{$r, $g, $b, $a\\},"
3378                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3379                 []>;
3380
3381multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3382                               NVPTXRegClass intype> {
3383  def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3384  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3385}
3386
3387defm TEX_UNIFIED_1D_F32_F32_GRAD
3388  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3389defm TEX_UNIFIED_1D_S32_F32_GRAD
3390  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3391defm TEX_UNIFIED_1D_U32_F32_GRAD
3392  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3393
3394class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3395                                NVPTXRegClass intype, dag tex>
3396    : NVPTXInst<(outs outtype:$r, outtype:$g,
3397                      outtype:$b, outtype:$a),
3398                 !con(tex, (ins Int32Regs:$l, intype:$x)),
3399                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3400                 []>;
3401
3402multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3403                                NVPTXRegClass intype> {
3404  def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3405  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3406}
3407
3408defm TEX_UNIFIED_1D_ARRAY_F32_S32
3409  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3410defm TEX_UNIFIED_1D_ARRAY_F32_F32
3411  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3412defm TEX_UNIFIED_1D_ARRAY_S32_S32
3413  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3414defm TEX_UNIFIED_1D_ARRAY_S32_F32
3415  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3416defm TEX_UNIFIED_1D_ARRAY_U32_S32
3417  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3418defm TEX_UNIFIED_1D_ARRAY_U32_F32
3419  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3420
3421class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3422                                      NVPTXRegClass intype, dag tex>
3423    : NVPTXInst<(outs outtype:$r, outtype:$g,
3424                      outtype:$b, outtype:$a),
3425                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3426                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3427                 []>;
3428
3429multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3430                                      NVPTXRegClass intype> {
3431  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3432                                           (ins Int64Regs:$t)>;
3433  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3434                                           (ins i64imm:$t)>;
3435}
3436
3437defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3438  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3439                               Float32Regs, Float32Regs>;
3440defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3441  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3442                               Int32Regs, Float32Regs>;
3443defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3444  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3445                               Int32Regs, Float32Regs>;
3446
3447class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3448                                     NVPTXRegClass intype, dag tex>
3449    : NVPTXInst<(outs outtype:$r, outtype:$g,
3450                      outtype:$b, outtype:$a),
3451                 !con(tex, (ins Int32Regs:$l, intype:$x,
3452                                intype:$gradx, intype:$grady)),
3453                 inst # " \t\\{$r, $g, $b, $a\\},"
3454                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3455                 []>;
3456
3457multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3458                                     NVPTXRegClass intype> {
3459  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3460                                          (ins Int64Regs:$t)>;
3461  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3462                                          (ins i64imm:$t)>;
3463}
3464
3465defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3466  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3467                              Float32Regs, Float32Regs>;
3468defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3469  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3470                              Int32Regs, Float32Regs>;
3471defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3472  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3473                              Int32Regs, Float32Regs>;
3474
3475class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3476                          NVPTXRegClass intype, dag tex>
3477    : NVPTXInst<(outs outtype:$r, outtype:$g,
3478                      outtype:$b, outtype:$a),
3479                 !con(tex, (ins intype:$x, intype:$y)),
3480                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3481                 []>;
3482
3483multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3484                          NVPTXRegClass intype> {
3485  def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3486  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3487}
3488
3489defm TEX_UNIFIED_2D_F32_S32
3490  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3491defm TEX_UNIFIED_2D_F32_F32
3492  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3493defm TEX_UNIFIED_2D_S32_S32
3494  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3495defm TEX_UNIFIED_2D_S32_F32
3496  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3497defm TEX_UNIFIED_2D_U32_S32
3498  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3499defm TEX_UNIFIED_2D_U32_F32
3500  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3501
3502class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3503                                NVPTXRegClass intype, dag tex>
3504    : NVPTXInst<(outs outtype:$r, outtype:$g,
3505                      outtype:$b, outtype:$a),
3506                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3507                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3508                 []>;
3509
3510multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3511                                NVPTXRegClass intype> {
3512  def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3513  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3514}
3515
3516defm TEX_UNIFIED_2D_F32_F32_LEVEL
3517  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3518defm TEX_UNIFIED_2D_S32_F32_LEVEL
3519  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3520defm TEX_UNIFIED_2D_U32_F32_LEVEL
3521  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3522
3523class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3524                               NVPTXRegClass intype, dag tex>
3525    : NVPTXInst<(outs outtype:$r, outtype:$g,
3526                      outtype:$b, outtype:$a),
3527                 !con(tex, (ins intype:$x, intype:$y,
3528                                intype:$gradx0, intype:$gradx1,
3529                                intype:$grady0, intype:$grady1)),
3530                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3531                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3532                 []>;
3533multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3534                               NVPTXRegClass intype> {
3535  def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3536  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3537}
3538
3539defm TEX_UNIFIED_2D_F32_F32_GRAD
3540  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3541defm TEX_UNIFIED_2D_S32_F32_GRAD
3542  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3543defm TEX_UNIFIED_2D_U32_F32_GRAD
3544  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3545
3546class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3547                                NVPTXRegClass intype, dag tex>
3548    : NVPTXInst<(outs outtype:$r, outtype:$g,
3549                      outtype:$b, outtype:$a),
3550                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3551                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3552                 []>;
3553multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3554                                NVPTXRegClass intype> {
3555  def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3556  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3557}
3558
3559defm TEX_UNIFIED_2D_ARRAY_F32_S32
3560  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3561defm TEX_UNIFIED_2D_ARRAY_F32_F32
3562  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3563defm TEX_UNIFIED_2D_ARRAY_S32_S32
3564  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3565defm TEX_UNIFIED_2D_ARRAY_S32_F32
3566  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3567defm TEX_UNIFIED_2D_ARRAY_U32_S32
3568  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3569defm TEX_UNIFIED_2D_ARRAY_U32_F32
3570  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3571
3572class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3573                                      NVPTXRegClass intype, dag tex>
3574    : NVPTXInst<(outs outtype:$r, outtype:$g,
3575                      outtype:$b, outtype:$a),
3576                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3577                                intype:$lod)),
3578                 inst # " \t\\{$r, $g, $b, $a\\},"
3579                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3580                 []>;
3581multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3582                                      NVPTXRegClass intype> {
3583  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3584                                           (ins Int64Regs:$t)>;
3585  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3586                                           (ins i64imm:$t)>;
3587}
3588
3589defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3590  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3591                               Float32Regs, Float32Regs>;
3592defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3593  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3594                               Int32Regs, Float32Regs>;
3595defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3596  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3597                               Int32Regs, Float32Regs>;
3598
3599class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3600                                     NVPTXRegClass intype, dag tex>
3601    : NVPTXInst<(outs outtype:$r, outtype:$g,
3602                      outtype:$b, outtype:$a),
3603                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3604                                intype:$gradx0, intype:$gradx1,
3605                                intype:$grady0, intype:$grady1)),
3606                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3607                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3608                 []>;
3609multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3610                                     NVPTXRegClass intype> {
3611  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3612                                          (ins Int64Regs:$t)>;
3613  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3614                                          (ins i64imm:$t)>;
3615}
3616
3617defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3618  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3619                              Float32Regs, Float32Regs>;
3620defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3621  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3622                              Int32Regs, Float32Regs>;
3623defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3624  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3625                              Int32Regs, Float32Regs>;
3626
3627class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3628                          NVPTXRegClass intype, dag tex>
3629    : NVPTXInst<(outs outtype:$r, outtype:$g,
3630                      outtype:$b, outtype:$a),
3631                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3632                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3633                 []>;
3634multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3635                          NVPTXRegClass intype> {
3636  def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3637  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3638}
3639
3640defm TEX_UNIFIED_3D_F32_S32
3641  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3642defm TEX_UNIFIED_3D_F32_F32
3643  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3644defm TEX_UNIFIED_3D_S32_S32
3645  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3646defm TEX_UNIFIED_3D_S32_F32
3647  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3648defm TEX_UNIFIED_3D_U32_S32
3649  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3650defm TEX_UNIFIED_3D_U32_F32
3651  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3652
3653class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3654                                NVPTXRegClass intype, dag tex>
3655    : NVPTXInst<(outs outtype:$r, outtype:$g,
3656                      outtype:$b, outtype:$a),
3657                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3658                 inst # " \t\\{$r, $g, $b, $a\\},"
3659                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3660                 []>;
3661multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3662                                NVPTXRegClass intype> {
3663  def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3664  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3665}
3666
3667defm TEX_UNIFIED_3D_F32_F32_LEVEL
3668  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3669defm TEX_UNIFIED_3D_S32_F32_LEVEL
3670  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3671defm TEX_UNIFIED_3D_U32_F32_LEVEL
3672  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3673
3674class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3675                               NVPTXRegClass intype, dag tex>
3676    : NVPTXInst<(outs outtype:$r, outtype:$g,
3677                      outtype:$b, outtype:$a),
3678                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3679                                intype:$gradx0, intype:$gradx1,
3680                                intype:$gradx2, intype:$grady0,
3681                                intype:$grady1, intype:$grady2)),
3682                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3683                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3684                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3685                 []>;
3686multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3687                               NVPTXRegClass intype> {
3688  def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3689  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3690}
3691
3692defm TEX_UNIFIED_3D_F32_F32_GRAD
3693  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3694defm TEX_UNIFIED_3D_S32_F32_GRAD
3695  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3696defm TEX_UNIFIED_3D_U32_F32_GRAD
3697  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3698
3699class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3700                            NVPTXRegClass intype, dag tex>
3701    : NVPTXInst<(outs outtype:$r, outtype:$g,
3702                      outtype:$b, outtype:$a),
3703                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3704                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3705                 []>;
3706multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3707                            NVPTXRegClass intype> {
3708  def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3709  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3710}
3711
3712defm TEX_UNIFIED_CUBE_F32_F32
3713  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3714defm TEX_UNIFIED_CUBE_S32_F32
3715  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3716defm TEX_UNIFIED_CUBE_U32_F32
3717  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3718
3719class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3720                                  NVPTXRegClass intype, dag tex>
3721    : NVPTXInst<(outs outtype:$r, outtype:$g,
3722                      outtype:$b, outtype:$a),
3723                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3724                 inst # " \t\\{$r, $g, $b, $a\\},"
3725                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3726                 []>;
3727multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3728                                  NVPTXRegClass intype> {
3729  def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3730                                       (ins Int64Regs:$t)>;
3731  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3732                                       (ins i64imm:$t)>;
3733}
3734
3735defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3736  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3737                           Float32Regs, Float32Regs>;
3738defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3739  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3740                           Int32Regs, Float32Regs>;
3741defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3742  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3743                           Int32Regs, Float32Regs>;
3744
3745class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3746                                  NVPTXRegClass intype, dag tex>
3747    : NVPTXInst<(outs outtype:$r, outtype:$g,
3748                      outtype:$b, outtype:$a),
3749                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3750                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3751                 []>;
3752multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3753                                  NVPTXRegClass intype> {
3754  def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3755                                       (ins Int64Regs:$t)>;
3756  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3757                                       (ins i64imm:$t)>;
3758}
3759
3760defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3761  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3762defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3763  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3764defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3765  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3766
3767class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3768                                        NVPTXRegClass intype, dag tex>
3769    : NVPTXInst<(outs outtype:$r, outtype:$g,
3770                      outtype:$b, outtype:$a),
3771                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3772                                intype:$lod)),
3773                 inst # " \t\\{$r, $g, $b, $a\\},"
3774                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3775                 []>;
3776multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3777                                        NVPTXRegClass intype> {
3778  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3779                                             (ins Int64Regs:$t)>;
3780  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3781                                             (ins i64imm:$t)>;
3782}
3783
3784defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3785  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3786                                 Float32Regs, Float32Regs>;
3787defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3788  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3789                                 Int32Regs, Float32Regs>;
3790defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3791  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3792                                 Int32Regs, Float32Regs>;
3793
3794class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
3795                                 NVPTXRegClass intype, dag tex>
3796    : NVPTXInst<(outs outtype:$r, outtype:$g,
3797                      outtype:$b, outtype:$a),
3798                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3799                                intype:$gradx0, intype:$gradx1,
3800                                intype:$gradx2, intype:$grady0,
3801                                intype:$grady1, intype:$grady2)),
3802                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3803                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3804                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3805                 []>;
3806
3807multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
3808                                 NVPTXRegClass intype> {
3809  def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3810  def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3811}
3812
3813defm TEX_UNIFIED_CUBE_F32_F32_GRAD
3814  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3815defm TEX_UNIFIED_CUBE_S32_F32_GRAD
3816  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3817defm TEX_UNIFIED_CUBE_U32_F32_GRAD
3818  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3819
3820class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3821                                       NVPTXRegClass intype, dag tex>
3822    : NVPTXInst<(outs outtype:$r, outtype:$g,
3823                      outtype:$b, outtype:$a),
3824                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3825                                intype:$gradx0, intype:$gradx1,
3826                                intype:$gradx2, intype:$grady0,
3827                                intype:$grady1, intype:$grady2)),
3828                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
3829                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3830                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3831                 []>;
3832multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3833                                       NVPTXRegClass intype> {
3834  def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3835                                            (ins Int64Regs:$t)>;
3836  def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3837                                            (ins i64imm:$t)>;
3838}
3839
3840defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
3841  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
3842                                Float32Regs, Float32Regs>;
3843defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
3844  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
3845                                Int32Regs, Float32Regs>;
3846defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
3847  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
3848                                Int32Regs, Float32Regs>;
3849
3850class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3851                           NVPTXRegClass intype, dag tex>
3852    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3853                      outtype:$v2, outtype:$v3),
3854                 !con(tex, (ins intype:$x, intype:$y)),
3855                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3856                 []>;
3857multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3858                           NVPTXRegClass intype> {
3859  def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3860  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3861}
3862
3863defm TLD4_UNIFIED_R_2D_F32_F32
3864  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3865defm TLD4_UNIFIED_G_2D_F32_F32
3866  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3867defm TLD4_UNIFIED_B_2D_F32_F32
3868  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3869defm TLD4_UNIFIED_A_2D_F32_F32
3870  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3871
3872defm TLD4_UNIFIED_R_2D_S32_F32
3873  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3874defm TLD4_UNIFIED_G_2D_S32_F32
3875  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3876defm TLD4_UNIFIED_B_2D_S32_F32
3877  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3878defm TLD4_UNIFIED_A_2D_S32_F32
3879  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3880
3881defm TLD4_UNIFIED_R_2D_U32_F32
3882  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3883defm TLD4_UNIFIED_G_2D_U32_F32
3884  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3885defm TLD4_UNIFIED_B_2D_U32_F32
3886  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3887defm TLD4_UNIFIED_A_2D_U32_F32
3888  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3889
3890}
3891
3892
3893
3894//=== Surface load instructions
3895
3896let IsSuld = true in {
3897
3898class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3899    : NVPTXInst<(outs outtype:$r),
3900                !con(surf, (ins Int32Regs:$x)),
3901                inst # " \\{$r\\}, [$s, \\{$x\\}];",
3902                []>;
3903multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3904  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3905  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3906}
3907
3908defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3909defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3910defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3911defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3912
3913defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3914defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3915defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3916defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3917
3918defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3919defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3920defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3921defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3922
3923class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3924    : NVPTXInst<(outs outtype:$r),
3925                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3926                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3927                []>;
3928multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3929  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3930  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3931}
3932
3933defm SULD_1D_ARRAY_I8_CLAMP
3934  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3935defm SULD_1D_ARRAY_I16_CLAMP
3936  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3937defm SULD_1D_ARRAY_I32_CLAMP
3938  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3939defm SULD_1D_ARRAY_I64_CLAMP
3940  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3941
3942defm SULD_1D_ARRAY_I8_TRAP
3943  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3944defm SULD_1D_ARRAY_I16_TRAP
3945  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3946defm SULD_1D_ARRAY_I32_TRAP
3947  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3948defm SULD_1D_ARRAY_I64_TRAP
3949  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3950
3951defm SULD_1D_ARRAY_I8_ZERO
3952  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3953defm SULD_1D_ARRAY_I16_ZERO
3954  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3955defm SULD_1D_ARRAY_I32_ZERO
3956  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3957defm SULD_1D_ARRAY_I64_ZERO
3958  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3959
3960class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3961    : NVPTXInst<(outs outtype:$r),
3962                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3963                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3964                []>;
3965multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3966  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3967  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3968}
3969
3970defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3971defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3972defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3973defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3974
3975defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3976defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3977defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3978defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3979
3980defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3981defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3982defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3983defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3984
3985class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3986    : NVPTXInst<(outs outtype:$r),
3987                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3988                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3989                []>;
3990multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3991  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3992  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3993}
3994
3995defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3996defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3997defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3998defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
3999
4000defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
4001defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
4002defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
4003defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
4004
4005defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
4006defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
4007defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
4008defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
4009
4010class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
4011    : NVPTXInst<(outs outtype:$r),
4012                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4013                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4014                []>;
4015multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
4016  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
4017  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
4018}
4019
4020defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
4021defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
4022defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
4023defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
4024
4025defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
4026defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
4027defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
4028defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
4029
4030defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
4031defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
4032defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
4033defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
4034}
4035
4036let IsSuld = 2 in {
4037
4038class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4039    : NVPTXInst<(outs outtype:$r, outtype:$g),
4040                !con(surf, (ins Int32Regs:$x)),
4041                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
4042                []>;
4043multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
4044  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4045  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
4046}
4047
4048defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
4049defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
4050defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
4051defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
4052
4053defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
4054defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
4055defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
4056defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
4057
4058defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
4059defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
4060defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
4061defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
4062
4063class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4064    : NVPTXInst<(outs outtype:$r, outtype:$g),
4065                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4066                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4067                []>;
4068multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4069  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4070  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4071}
4072
4073defm SULD_1D_ARRAY_V2I8_CLAMP
4074  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
4075defm SULD_1D_ARRAY_V2I16_CLAMP
4076  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
4077defm SULD_1D_ARRAY_V2I32_CLAMP
4078  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
4079defm SULD_1D_ARRAY_V2I64_CLAMP
4080  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
4081
4082defm SULD_1D_ARRAY_V2I8_TRAP
4083  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
4084defm SULD_1D_ARRAY_V2I16_TRAP
4085  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
4086defm SULD_1D_ARRAY_V2I32_TRAP
4087  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
4088defm SULD_1D_ARRAY_V2I64_TRAP
4089  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
4090
4091defm SULD_1D_ARRAY_V2I8_ZERO
4092  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4093defm SULD_1D_ARRAY_V2I16_ZERO
4094  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4095defm SULD_1D_ARRAY_V2I32_ZERO
4096  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4097defm SULD_1D_ARRAY_V2I64_ZERO
4098  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4099
4100class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4101    : NVPTXInst<(outs outtype:$r, outtype:$g),
4102                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4103                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4104                []>;
4105multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4106  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4107  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4108}
4109
4110defm SULD_2D_V2I8_CLAMP
4111  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4112defm SULD_2D_V2I16_CLAMP
4113  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4114defm SULD_2D_V2I32_CLAMP
4115  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4116defm SULD_2D_V2I64_CLAMP
4117  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4118
4119defm SULD_2D_V2I8_TRAP
4120  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4121defm SULD_2D_V2I16_TRAP
4122  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4123defm SULD_2D_V2I32_TRAP
4124  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4125defm SULD_2D_V2I64_TRAP
4126  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4127
4128defm SULD_2D_V2I8_ZERO
4129  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4130defm SULD_2D_V2I16_ZERO
4131  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4132defm SULD_2D_V2I32_ZERO
4133  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4134defm SULD_2D_V2I64_ZERO
4135  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4136
4137class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4138    : NVPTXInst<(outs outtype:$r, outtype:$g),
4139                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4140                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4141                []>;
4142multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4143  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4144  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4145}
4146
4147defm SULD_2D_ARRAY_V2I8_CLAMP
4148  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4149defm SULD_2D_ARRAY_V2I16_CLAMP
4150  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4151defm SULD_2D_ARRAY_V2I32_CLAMP
4152  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4153defm SULD_2D_ARRAY_V2I64_CLAMP
4154  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4155
4156defm SULD_2D_ARRAY_V2I8_TRAP
4157  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4158defm SULD_2D_ARRAY_V2I16_TRAP
4159  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4160defm SULD_2D_ARRAY_V2I32_TRAP
4161  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4162defm SULD_2D_ARRAY_V2I64_TRAP
4163  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4164
4165defm SULD_2D_ARRAY_V2I8_ZERO
4166  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4167defm SULD_2D_ARRAY_V2I16_ZERO
4168  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4169defm SULD_2D_ARRAY_V2I32_ZERO
4170  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4171defm SULD_2D_ARRAY_V2I64_ZERO
4172  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4173
4174class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4175    : NVPTXInst<(outs outtype:$r, outtype:$g),
4176                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4177                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4178                []>;
4179multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4180  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4181  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4182}
4183
4184defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4185defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4186defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4187defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4188
4189defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4190defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4191defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4192defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4193
4194defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4195defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4196defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4197defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4198
4199}
4200
4201let IsSuld = 3 in {
4202
4203class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4204    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4205                !con(surf, (ins Int32Regs:$x)),
4206                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4207                []>;
4208multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4209  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4210  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4211}
4212
4213defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4214defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4215defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4216
4217defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4218defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4219defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4220
4221defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4222defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4223defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4224
4225class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4226    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4227                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4228                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4229                []>;
4230multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4231  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4232  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4233}
4234
4235defm SULD_1D_ARRAY_V4I8_CLAMP
4236  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4237defm SULD_1D_ARRAY_V4I16_CLAMP
4238  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4239defm SULD_1D_ARRAY_V4I32_CLAMP
4240  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4241
4242defm SULD_1D_ARRAY_V4I8_TRAP
4243  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4244defm SULD_1D_ARRAY_V4I16_TRAP
4245  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4246defm SULD_1D_ARRAY_V4I32_TRAP
4247  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4248
4249defm SULD_1D_ARRAY_V4I8_ZERO
4250  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4251defm SULD_1D_ARRAY_V4I16_ZERO
4252  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4253defm SULD_1D_ARRAY_V4I32_ZERO
4254  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4255
4256class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4257    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4258                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4259                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4260                []>;
4261multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4262  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4263  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4264}
4265
4266defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4267defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4268defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4269
4270defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4271defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4272defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4273
4274defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4275defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4276defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4277
4278class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4279    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4280                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4281                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4282                []>;
4283multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4284  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4285  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4286}
4287
4288defm SULD_2D_ARRAY_V4I8_CLAMP
4289  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4290defm SULD_2D_ARRAY_V4I16_CLAMP
4291  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4292defm SULD_2D_ARRAY_V4I32_CLAMP
4293  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4294
4295defm SULD_2D_ARRAY_V4I8_TRAP
4296  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4297defm SULD_2D_ARRAY_V4I16_TRAP
4298  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4299defm SULD_2D_ARRAY_V4I32_TRAP
4300  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4301
4302defm SULD_2D_ARRAY_V4I8_ZERO
4303  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4304defm SULD_2D_ARRAY_V4I16_ZERO
4305  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4306defm SULD_2D_ARRAY_V4I32_ZERO
4307  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4308
4309class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4310    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4311                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4312                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4313                []>;
4314multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4315  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4316  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4317}
4318
4319defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4320defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4321defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4322
4323defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4324defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4325defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4326
4327defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4328defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4329defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4330
4331}
4332
4333//-----------------------------------
4334// Texture Query Intrinsics
4335//-----------------------------------
4336
4337let IsSurfTexQuery = true in {
4338def TXQ_CHANNEL_ORDER_R
4339  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4340              "txq.channel_order.b32 \t$d, [$a];",
4341              []>;
4342def TXQ_CHANNEL_ORDER_I
4343  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4344              "txq.channel_order.b32 \t$d, [$a];",
4345              []>;
4346def TXQ_CHANNEL_DATA_TYPE_R
4347  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4348              "txq.channel_data_type.b32 \t$d, [$a];",
4349              []>;
4350def TXQ_CHANNEL_DATA_TYPE_I
4351  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4352              "txq.channel_data_type.b32 \t$d, [$a];",
4353              []>;
4354def TXQ_WIDTH_R
4355  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4356              "txq.width.b32 \t$d, [$a];",
4357              []>;
4358def TXQ_WIDTH_I
4359  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4360              "txq.width.b32 \t$d, [$a];",
4361              []>;
4362def TXQ_HEIGHT_R
4363  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4364              "txq.height.b32 \t$d, [$a];",
4365              []>;
4366def TXQ_HEIGHT_I
4367  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4368              "txq.height.b32 \t$d, [$a];",
4369              []>;
4370def TXQ_DEPTH_R
4371  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4372              "txq.depth.b32 \t$d, [$a];",
4373              []>;
4374def TXQ_DEPTH_I
4375  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4376              "txq.depth.b32 \t$d, [$a];",
4377              []>;
4378def TXQ_ARRAY_SIZE_R
4379  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4380              "txq.array_size.b32 \t$d, [$a];",
4381              []>;
4382def TXQ_ARRAY_SIZE_I
4383  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4384              "txq.array_size.b32 \t$d, [$a];",
4385              []>;
4386def TXQ_NUM_SAMPLES_R
4387  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4388              "txq.num_samples.b32 \t$d, [$a];",
4389              []>;
4390def TXQ_NUM_SAMPLES_I
4391  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4392              "txq.num_samples.b32 \t$d, [$a];",
4393              []>;
4394def TXQ_NUM_MIPMAP_LEVELS_R
4395  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4396              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4397              []>;
4398def TXQ_NUM_MIPMAP_LEVELS_I
4399  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4400              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4401              []>;
4402}
4403
4404def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4405          (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4406def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4407          (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4408def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4409          (TXQ_WIDTH_R Int64Regs:$a)>;
4410def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4411          (TXQ_HEIGHT_R Int64Regs:$a)>;
4412def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4413          (TXQ_DEPTH_R Int64Regs:$a)>;
4414def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4415          (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4416def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4417          (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4418def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4419          (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4420
4421
4422//-----------------------------------
4423// Surface Query Intrinsics
4424//-----------------------------------
4425
4426let IsSurfTexQuery = true in {
4427def SUQ_CHANNEL_ORDER_R
4428  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4429              "suq.channel_order.b32 \t$d, [$a];",
4430              []>;
4431def SUQ_CHANNEL_ORDER_I
4432  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4433              "suq.channel_order.b32 \t$d, [$a];",
4434              []>;
4435def SUQ_CHANNEL_DATA_TYPE_R
4436  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4437              "suq.channel_data_type.b32 \t$d, [$a];",
4438              []>;
4439def SUQ_CHANNEL_DATA_TYPE_I
4440  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4441              "suq.channel_data_type.b32 \t$d, [$a];",
4442              []>;
4443def SUQ_WIDTH_R
4444  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4445              "suq.width.b32 \t$d, [$a];",
4446              []>;
4447def SUQ_WIDTH_I
4448  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4449              "suq.width.b32 \t$d, [$a];",
4450              []>;
4451def SUQ_HEIGHT_R
4452  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4453              "suq.height.b32 \t$d, [$a];",
4454              []>;
4455def SUQ_HEIGHT_I
4456  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4457              "suq.height.b32 \t$d, [$a];",
4458              []>;
4459def SUQ_DEPTH_R
4460  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4461              "suq.depth.b32 \t$d, [$a];",
4462              []>;
4463def SUQ_DEPTH_I
4464  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4465              "suq.depth.b32 \t$d, [$a];",
4466              []>;
4467def SUQ_ARRAY_SIZE_R
4468  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4469              "suq.array_size.b32 \t$d, [$a];",
4470              []>;
4471def SUQ_ARRAY_SIZE_I
4472  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4473              "suq.array_size.b32 \t$d, [$a];",
4474              []>;
4475}
4476
4477def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4478          (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4479def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4480          (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4481def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4482          (SUQ_WIDTH_R Int64Regs:$a)>;
4483def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4484          (SUQ_HEIGHT_R Int64Regs:$a)>;
4485def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4486          (SUQ_DEPTH_R Int64Regs:$a)>;
4487def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4488          (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4489
4490
4491//===- Handle Query -------------------------------------------------------===//
4492
4493// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4494def ISTYPEP_SAMPLER
4495  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4496              "istypep.samplerref \t$d, $a;",
4497              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4498def ISTYPEP_SURFACE
4499  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4500              "istypep.surfref \t$d, $a;",
4501              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4502def ISTYPEP_TEXTURE
4503  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4504              "istypep.texref \t$d, $a;",
4505              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4506
4507//===- Surface Stores -----------------------------------------------------===//
4508
4509let IsSust = true in {
4510
4511class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4512    : NVPTXInst<(outs),
4513                !con(surf, (ins Int32Regs:$x, intype:$r)),
4514                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4515                []>;
4516multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4517  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4518  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4519}
4520
4521defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4522defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4523defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4524defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4525
4526defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4527defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4528defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4529defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4530
4531defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4532defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4533defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4534defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4535
4536defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4537defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4538defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4539
4540class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4541    : NVPTXInst<(outs),
4542                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4543                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4544                []>;
4545multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4546  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4547  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4548}
4549
4550defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4551defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4552defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4553defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4554
4555defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4556defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4557defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4558defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4559
4560defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4561defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4562defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4563defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4564
4565defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4566defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4567defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4568
4569class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4570    : NVPTXInst<(outs),
4571                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4572                                intype:$b, intype:$a)),
4573                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4574                []>;
4575multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4576  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4577  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4578}
4579
4580defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4581defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4582defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4583
4584defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4585defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4586defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4587
4588defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4589defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4590defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4591
4592defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4593defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4594defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4595
4596class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4597    : NVPTXInst<(outs),
4598                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4599                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4600                []>;
4601multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4602  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4603  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4604}
4605
4606defm SUST_B_1D_ARRAY_B8_CLAMP
4607  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4608defm SUST_B_1D_ARRAY_B16_CLAMP
4609  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4610defm SUST_B_1D_ARRAY_B32_CLAMP
4611  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4612defm SUST_B_1D_ARRAY_B64_CLAMP
4613  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4614
4615defm SUST_B_1D_ARRAY_B8_TRAP
4616  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4617defm SUST_B_1D_ARRAY_B16_TRAP
4618  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4619defm SUST_B_1D_ARRAY_B32_TRAP
4620  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4621defm SUST_B_1D_ARRAY_B64_TRAP
4622  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4623
4624defm SUST_B_1D_ARRAY_B8_ZERO
4625  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4626defm SUST_B_1D_ARRAY_B16_ZERO
4627  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4628defm SUST_B_1D_ARRAY_B32_ZERO
4629  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4630defm SUST_B_1D_ARRAY_B64_ZERO
4631  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4632
4633defm SUST_P_1D_ARRAY_B8_TRAP
4634  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4635defm SUST_P_1D_ARRAY_B16_TRAP
4636  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4637defm SUST_P_1D_ARRAY_B32_TRAP
4638  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4639
4640class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4641    : NVPTXInst<(outs),
4642                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4643                                intype:$r, intype:$g)),
4644                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4645                []>;
4646multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4647  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4648  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4649}
4650
4651defm SUST_B_1D_ARRAY_V2B8_CLAMP
4652  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4653defm SUST_B_1D_ARRAY_V2B16_CLAMP
4654  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4655defm SUST_B_1D_ARRAY_V2B32_CLAMP
4656  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4657defm SUST_B_1D_ARRAY_V2B64_CLAMP
4658  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4659
4660defm SUST_B_1D_ARRAY_V2B8_TRAP
4661  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4662defm SUST_B_1D_ARRAY_V2B16_TRAP
4663  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4664defm SUST_B_1D_ARRAY_V2B32_TRAP
4665  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4666defm SUST_B_1D_ARRAY_V2B64_TRAP
4667  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4668
4669defm SUST_B_1D_ARRAY_V2B8_ZERO
4670  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4671defm SUST_B_1D_ARRAY_V2B16_ZERO
4672  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4673defm SUST_B_1D_ARRAY_V2B32_ZERO
4674  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4675defm SUST_B_1D_ARRAY_V2B64_ZERO
4676  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4677
4678defm SUST_P_1D_ARRAY_V2B8_TRAP
4679  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4680defm SUST_P_1D_ARRAY_V2B16_TRAP
4681  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4682defm SUST_P_1D_ARRAY_V2B32_TRAP
4683  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4684
4685class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4686    : NVPTXInst<(outs),
4687                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4688                                intype:$r, intype:$g, intype:$b, intype:$a)),
4689                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4690                []>;
4691multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4692  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4693  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4694}
4695
4696defm SUST_B_1D_ARRAY_V4B8_CLAMP
4697  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4698defm SUST_B_1D_ARRAY_V4B16_CLAMP
4699  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4700defm SUST_B_1D_ARRAY_V4B32_CLAMP
4701  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4702
4703defm SUST_B_1D_ARRAY_V4B8_TRAP
4704  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4705defm SUST_B_1D_ARRAY_V4B16_TRAP
4706  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4707defm SUST_B_1D_ARRAY_V4B32_TRAP
4708  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4709
4710defm SUST_B_1D_ARRAY_V4B8_ZERO
4711  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4712defm SUST_B_1D_ARRAY_V4B16_ZERO
4713  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4714defm SUST_B_1D_ARRAY_V4B32_ZERO
4715  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4716
4717defm SUST_P_1D_ARRAY_V4B8_TRAP
4718  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4719defm SUST_P_1D_ARRAY_V4B16_TRAP
4720  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4721defm SUST_P_1D_ARRAY_V4B32_TRAP
4722  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4723
4724class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4725    : NVPTXInst<(outs),
4726                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4727                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4728                []>;
4729multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4730  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4731  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4732}
4733
4734defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4735defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4736defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4737defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4738
4739defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4740defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4741defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4742defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4743
4744defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4745defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4746defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4747defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4748
4749defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4750defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4751defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4752
4753class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4754    : NVPTXInst<(outs),
4755                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4756                                intype:$r, intype:$g)),
4757                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4758                []>;
4759multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4760  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4761  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4762}
4763
4764defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4765defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4766defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4767defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4768
4769defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4770defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4771defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4772defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4773
4774defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4775defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4776defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4777defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4778
4779defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4780defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4781defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4782
4783class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4784    : NVPTXInst<(outs),
4785                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4786                                intype:$r, intype:$g, intype:$b, intype:$a)),
4787                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4788                []>;
4789multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4790  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4791  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4792}
4793
4794defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4795defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4796defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4797
4798defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4799defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4800defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4801
4802defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4803defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4804defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4805
4806defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4807defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4808defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4809
4810class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4811    : NVPTXInst<(outs),
4812                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4813                                intype:$r)),
4814                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4815                []>;
4816multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4817  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4818  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4819}
4820
4821defm SUST_B_2D_ARRAY_B8_CLAMP
4822  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4823defm SUST_B_2D_ARRAY_B16_CLAMP
4824  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4825defm SUST_B_2D_ARRAY_B32_CLAMP
4826  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4827defm SUST_B_2D_ARRAY_B64_CLAMP
4828  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4829
4830defm SUST_B_2D_ARRAY_B8_TRAP
4831  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4832defm SUST_B_2D_ARRAY_B16_TRAP
4833  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4834defm SUST_B_2D_ARRAY_B32_TRAP
4835  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4836defm SUST_B_2D_ARRAY_B64_TRAP
4837  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4838
4839defm SUST_B_2D_ARRAY_B8_ZERO
4840  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4841defm SUST_B_2D_ARRAY_B16_ZERO
4842  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4843defm SUST_B_2D_ARRAY_B32_ZERO
4844  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4845defm SUST_B_2D_ARRAY_B64_ZERO
4846  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4847
4848defm SUST_P_2D_ARRAY_B8_TRAP
4849  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4850defm SUST_P_2D_ARRAY_B16_TRAP
4851  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4852defm SUST_P_2D_ARRAY_B32_TRAP
4853  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4854
4855class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4856    : NVPTXInst<(outs),
4857                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4858                                intype:$r, intype:$g)),
4859                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4860                []>;
4861multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4862  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4863  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4864}
4865
4866defm SUST_B_2D_ARRAY_V2B8_CLAMP
4867  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4868defm SUST_B_2D_ARRAY_V2B16_CLAMP
4869  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4870defm SUST_B_2D_ARRAY_V2B32_CLAMP
4871  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4872defm SUST_B_2D_ARRAY_V2B64_CLAMP
4873  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4874
4875defm SUST_B_2D_ARRAY_V2B8_TRAP
4876  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4877defm SUST_B_2D_ARRAY_V2B16_TRAP
4878  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4879defm SUST_B_2D_ARRAY_V2B32_TRAP
4880  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4881defm SUST_B_2D_ARRAY_V2B64_TRAP
4882  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4883
4884defm SUST_B_2D_ARRAY_V2B8_ZERO
4885  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4886defm SUST_B_2D_ARRAY_V2B16_ZERO
4887  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4888defm SUST_B_2D_ARRAY_V2B32_ZERO
4889  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4890defm SUST_B_2D_ARRAY_V2B64_ZERO
4891  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4892
4893defm SUST_P_2D_ARRAY_V2B8_TRAP
4894  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4895defm SUST_P_2D_ARRAY_V2B16_TRAP
4896  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4897defm SUST_P_2D_ARRAY_V2B32_TRAP
4898  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4899
4900class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4901    : NVPTXInst<(outs),
4902                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4903                                intype:$r, intype:$g, intype:$b, intype:$a)),
4904                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4905                []>;
4906multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4907  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4908  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4909}
4910
4911defm SUST_B_2D_ARRAY_V4B8_CLAMP
4912  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4913defm SUST_B_2D_ARRAY_V4B16_CLAMP
4914  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4915defm SUST_B_2D_ARRAY_V4B32_CLAMP
4916  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4917
4918defm SUST_B_2D_ARRAY_V4B8_TRAP
4919  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4920defm SUST_B_2D_ARRAY_V4B16_TRAP
4921  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4922defm SUST_B_2D_ARRAY_V4B32_TRAP
4923  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4924
4925defm SUST_B_2D_ARRAY_V4B8_ZERO
4926  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4927defm SUST_B_2D_ARRAY_V4B16_ZERO
4928  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4929defm SUST_B_2D_ARRAY_V4B32_ZERO
4930  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4931
4932defm SUST_P_2D_ARRAY_V4B8_TRAP
4933  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4934defm SUST_P_2D_ARRAY_V4B16_TRAP
4935  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4936defm SUST_P_2D_ARRAY_V4B32_TRAP
4937  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4938
4939class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4940    : NVPTXInst<(outs),
4941                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4942                                intype:$r)),
4943                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4944                []>;
4945multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4946  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4947  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4948}
4949
4950defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4951defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4952defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4953defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4954
4955defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4956defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4957defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4958defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4959
4960defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4961defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4962defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4963defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4964
4965defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4966defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4967defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4968
4969class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4970    : NVPTXInst<(outs),
4971                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4972                                intype:$r, intype:$g)),
4973                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4974                []>;
4975multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4976  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4977  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4978}
4979
4980defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4981defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4982defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4983defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4984
4985defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4986defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4987defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4988defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4989
4990defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4991defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4992defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4993defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4994
4995defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4996defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4997defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4998
4999class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
5000    : NVPTXInst<(outs),
5001                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5002                                intype:$r, intype:$g, intype:$b, intype:$a)),
5003                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
5004                []>;
5005multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
5006  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5007  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
5008}
5009
5010defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
5011defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
5012defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
5013
5014defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
5015defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
5016defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
5017
5018defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
5019defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
5020defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
5021
5022defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
5023defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
5024defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
5025
5026}
5027
5028// Surface store instruction patterns
5029// I'm not sure why we can't just include these in the instruction definitions,
5030// but TableGen complains of type errors :(
5031
5032// .clamp variant
5033def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5034           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5035          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5036
5037def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5038           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5039          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5040
5041def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5042           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5043          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5044
5045def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5046           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5047          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5048
5049def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5050           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5051          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5052           Int16Regs:$r, Int16Regs:$g)>;
5053
5054def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5055           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5056          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5057           Int16Regs:$r, Int16Regs:$g)>;
5058
5059def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5060           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5061          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5062           Int32Regs:$r, Int32Regs:$g)>;
5063
5064def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5065           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5066          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5067           Int64Regs:$r, Int64Regs:$g)>;
5068
5069def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5070           Int64Regs:$s, Int32Regs:$x,
5071           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5072          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5073           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5074
5075def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5076           Int64Regs:$s, Int32Regs:$x,
5077           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5078          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5079           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5080
5081def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5082           Int64Regs:$s, Int32Regs:$x,
5083           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5084          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5085           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5086
5087
5088
5089def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5090           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5091          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5092           Int16Regs:$r)>;
5093
5094def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5095           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5096          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5097           Int16Regs:$r)>;
5098
5099def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5100           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5101          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5102           Int32Regs:$r)>;
5103
5104def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5105           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5106          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5107           Int64Regs:$r)>;
5108
5109def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5110          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5111          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5112           Int16Regs:$r, Int16Regs:$g)>;
5113
5114def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5115          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5116          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5117           Int16Regs:$r, Int16Regs:$g)>;
5118
5119def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5120          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5121          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5122           Int32Regs:$r, Int32Regs:$g)>;
5123
5124def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5125          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5126          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5127           Int64Regs:$r, Int64Regs:$g)>;
5128
5129def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5130           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5131           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5132          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5133           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5134
5135def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5136           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5137           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5138          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5139           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5140
5141def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5142           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5143           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5144          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5145           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5146
5147
5148
5149def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5150           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5151          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5152           Int16Regs:$r)>;
5153
5154def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5155           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5156          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5157           Int16Regs:$r)>;
5158
5159def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5160           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5161          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5162           Int32Regs:$r)>;
5163
5164def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5165           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5166          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5167           Int64Regs:$r)>;
5168
5169def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5170          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5171          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5172           Int16Regs:$r, Int16Regs:$g)>;
5173
5174def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5175          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5176          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5177           Int16Regs:$r, Int16Regs:$g)>;
5178
5179def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5180          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5181          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5182           Int32Regs:$r, Int32Regs:$g)>;
5183
5184def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5185          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5186          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5187           Int64Regs:$r, Int64Regs:$g)>;
5188
5189def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5190           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5191           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5192          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5193           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5194
5195def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5196           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5197           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5198          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5199           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5200
5201def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5202           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5203           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5204          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5205           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5206
5207
5208
5209def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5210          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5211          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5212           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5213           Int16Regs:$r)>;
5214
5215def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5216          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5217          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5218           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5219           Int16Regs:$r)>;
5220
5221def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5222          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5223          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5224           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5225           Int32Regs:$r)>;
5226
5227def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5228          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5229          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5230           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5231           Int64Regs:$r)>;
5232
5233def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5234           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5235           Int16Regs:$r, Int16Regs:$g),
5236          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5237           Int32Regs:$x, Int32Regs:$y,
5238           Int16Regs:$r, Int16Regs:$g)>;
5239
5240def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5241           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5242           Int16Regs:$r, Int16Regs:$g),
5243          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5244           Int32Regs:$x, Int32Regs:$y,
5245           Int16Regs:$r, Int16Regs:$g)>;
5246
5247def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5248           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5249           Int32Regs:$g),
5250          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5251           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5252
5253def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5254           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5255           Int64Regs:$g),
5256          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5257           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5258
5259def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5260           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5261           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5262          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5263           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5264           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5265
5266def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5267           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5268           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5269          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5270           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5271           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5272
5273def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5274           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5275           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5276          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5277           Int32Regs:$x, Int32Regs:$y,
5278           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5279
5280
5281
5282def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5283           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5284           Int16Regs:$r),
5285          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5286           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5287           Int16Regs:$r)>;
5288
5289def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5290           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5291           Int16Regs:$r),
5292          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5293           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5294           Int16Regs:$r)>;
5295
5296def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5297           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5298           Int32Regs:$r),
5299          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5300           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5301           Int32Regs:$r)>;
5302
5303def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5304           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5305           Int64Regs:$r),
5306          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5307           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5308           Int64Regs:$r)>;
5309
5310def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5311           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5312           Int16Regs:$r, Int16Regs:$g),
5313          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5314           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5315           Int16Regs:$r, Int16Regs:$g)>;
5316
5317def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5318           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5319           Int16Regs:$r, Int16Regs:$g),
5320          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5321           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5322           Int16Regs:$r, Int16Regs:$g)>;
5323
5324def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5325           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5326           Int32Regs:$r, Int32Regs:$g),
5327          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5328           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5329           Int32Regs:$r, Int32Regs:$g)>;
5330
5331def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5332           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5333           Int64Regs:$r, Int64Regs:$g),
5334          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5335           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5336           Int64Regs:$r, Int64Regs:$g)>;
5337
5338def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5339           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5340           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5341          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5342           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5343           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5344
5345def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5346           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5347           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5348          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5349           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5350           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5351
5352def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5353           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5354           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5355          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5356           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5357           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5358
5359
5360// .trap variant
5361def : Pat<(int_nvvm_sust_b_1d_i8_trap
5362           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5363          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5364
5365def : Pat<(int_nvvm_sust_b_1d_i16_trap
5366           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5367          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5368
5369def : Pat<(int_nvvm_sust_b_1d_i32_trap
5370           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5371          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5372
5373def : Pat<(int_nvvm_sust_b_1d_i64_trap
5374           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5375          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5376
5377def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5378           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5379          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5380           Int16Regs:$r, Int16Regs:$g)>;
5381
5382def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5383           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5384          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5385           Int16Regs:$r, Int16Regs:$g)>;
5386
5387def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5388           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5389          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5390           Int32Regs:$r, Int32Regs:$g)>;
5391
5392def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5393           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5394          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5395           Int64Regs:$r, Int64Regs:$g)>;
5396
5397def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5398           Int64Regs:$s, Int32Regs:$x,
5399           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5400          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5401           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5402
5403def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5404           Int64Regs:$s, Int32Regs:$x,
5405           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5406          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5407           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5408
5409def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5410           Int64Regs:$s, Int32Regs:$x,
5411           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5412          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5413           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5414
5415
5416
5417def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5418           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5419          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5420           Int16Regs:$r)>;
5421
5422def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5423           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5424          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5425           Int16Regs:$r)>;
5426
5427def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5428           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5429          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5430           Int32Regs:$r)>;
5431
5432def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5433           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5434          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5435           Int64Regs:$r)>;
5436
5437def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5438          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5439          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5440           Int16Regs:$r, Int16Regs:$g)>;
5441
5442def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5443          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5444          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5445           Int16Regs:$r, Int16Regs:$g)>;
5446
5447def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5448          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5449          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5450           Int32Regs:$r, Int32Regs:$g)>;
5451
5452def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5453          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5454          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5455           Int64Regs:$r, Int64Regs:$g)>;
5456
5457def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5458           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5459           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5460          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5461           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5462
5463def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5464           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5465           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5466          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5467           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5468
5469def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5470           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5471           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5472          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5473           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5474
5475
5476
5477def : Pat<(int_nvvm_sust_b_2d_i8_trap
5478           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5479          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5480           Int16Regs:$r)>;
5481
5482def : Pat<(int_nvvm_sust_b_2d_i16_trap
5483           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5484          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5485           Int16Regs:$r)>;
5486
5487def : Pat<(int_nvvm_sust_b_2d_i32_trap
5488           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5489          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5490           Int32Regs:$r)>;
5491
5492def : Pat<(int_nvvm_sust_b_2d_i64_trap
5493           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5494          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5495           Int64Regs:$r)>;
5496
5497def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5498          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5499          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5500           Int16Regs:$r, Int16Regs:$g)>;
5501
5502def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5503          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5504          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5505           Int16Regs:$r, Int16Regs:$g)>;
5506
5507def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5508          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5509          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5510           Int32Regs:$r, Int32Regs:$g)>;
5511
5512def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5513          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5514          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5515           Int64Regs:$r, Int64Regs:$g)>;
5516
5517def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5518           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5519           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5520          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5521           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5522
5523def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5524           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5525           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5526          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5527           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5528
5529def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5530           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5531           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5532          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5533           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5534
5535
5536
5537def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5538          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5539          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5540           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5541           Int16Regs:$r)>;
5542
5543def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5544          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5545          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5546           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5547           Int16Regs:$r)>;
5548
5549def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5550          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5551          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5552           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5553           Int32Regs:$r)>;
5554
5555def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5556          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5557          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5558           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5559           Int64Regs:$r)>;
5560
5561def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5562           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5563           Int16Regs:$r, Int16Regs:$g),
5564          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5565           Int32Regs:$x, Int32Regs:$y,
5566           Int16Regs:$r, Int16Regs:$g)>;
5567
5568def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5569           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5570           Int16Regs:$r, Int16Regs:$g),
5571          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5572           Int32Regs:$x, Int32Regs:$y,
5573           Int16Regs:$r, Int16Regs:$g)>;
5574
5575def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5576           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5577           Int32Regs:$g),
5578          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5579           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5580
5581def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5582           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5583           Int64Regs:$g),
5584          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5585           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5586
5587def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5588           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5589           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5590          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5591           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5592           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5593
5594def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5595           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5596           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5597          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5598           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5599           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5600
5601def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5602           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5603           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5604          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5605           Int32Regs:$x, Int32Regs:$y,
5606           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5607
5608
5609
5610def : Pat<(int_nvvm_sust_b_3d_i8_trap
5611           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5612           Int16Regs:$r),
5613          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5614           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5615           Int16Regs:$r)>;
5616
5617def : Pat<(int_nvvm_sust_b_3d_i16_trap
5618           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5619           Int16Regs:$r),
5620          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5621           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5622           Int16Regs:$r)>;
5623
5624def : Pat<(int_nvvm_sust_b_3d_i32_trap
5625           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5626           Int32Regs:$r),
5627          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5628           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5629           Int32Regs:$r)>;
5630
5631def : Pat<(int_nvvm_sust_b_3d_i64_trap
5632           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5633           Int64Regs:$r),
5634          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5635           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5636           Int64Regs:$r)>;
5637
5638def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5639           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5640           Int16Regs:$r, Int16Regs:$g),
5641          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5642           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5643           Int16Regs:$r, Int16Regs:$g)>;
5644
5645def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5646           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5647           Int16Regs:$r, Int16Regs:$g),
5648          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5649           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5650           Int16Regs:$r, Int16Regs:$g)>;
5651
5652def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5653           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5654           Int32Regs:$r, Int32Regs:$g),
5655          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5656           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5657           Int32Regs:$r, Int32Regs:$g)>;
5658
5659def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5660           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5661           Int64Regs:$r, Int64Regs:$g),
5662          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5663           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5664           Int64Regs:$r, Int64Regs:$g)>;
5665
5666def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5667           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5668           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5669          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5670           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5671           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5672
5673def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5674           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5675           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5676          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5677           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5678           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5679
5680def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5681           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5682           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5683          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5684           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5685           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5686
5687
5688// .zero variant
5689def : Pat<(int_nvvm_sust_b_1d_i8_zero
5690           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5691          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5692
5693def : Pat<(int_nvvm_sust_b_1d_i16_zero
5694           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5695          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5696
5697def : Pat<(int_nvvm_sust_b_1d_i32_zero
5698           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5699          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5700
5701def : Pat<(int_nvvm_sust_b_1d_i64_zero
5702           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5703          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5704
5705def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5706           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5707          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5708           Int16Regs:$r, Int16Regs:$g)>;
5709
5710def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5711           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5712          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5713           Int16Regs:$r, Int16Regs:$g)>;
5714
5715def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5716           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5717          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5718           Int32Regs:$r, Int32Regs:$g)>;
5719
5720def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5721           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5722          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5723           Int64Regs:$r, Int64Regs:$g)>;
5724
5725def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5726           Int64Regs:$s, Int32Regs:$x,
5727           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5728          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5729           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5730
5731def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5732           Int64Regs:$s, Int32Regs:$x,
5733           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5734          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5735           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5736
5737def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5738           Int64Regs:$s, Int32Regs:$x,
5739           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5740          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5741           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5742
5743
5744
5745def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5746           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5747          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5748           Int16Regs:$r)>;
5749
5750def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5751           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5752          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5753           Int16Regs:$r)>;
5754
5755def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5756           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5757          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5758           Int32Regs:$r)>;
5759
5760def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5761           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5762          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5763           Int64Regs:$r)>;
5764
5765def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5766          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5767          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5768           Int16Regs:$r, Int16Regs:$g)>;
5769
5770def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5771          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5772          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5773           Int16Regs:$r, Int16Regs:$g)>;
5774
5775def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5776          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5777          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5778           Int32Regs:$r, Int32Regs:$g)>;
5779
5780def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5781          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5782          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5783           Int64Regs:$r, Int64Regs:$g)>;
5784
5785def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5786           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5787           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5788          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5789           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5790
5791def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5792           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5793           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5794          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5795           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5796
5797def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5798           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5799           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5800          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5801           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5802
5803
5804
5805def : Pat<(int_nvvm_sust_b_2d_i8_zero
5806           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5807          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5808           Int16Regs:$r)>;
5809
5810def : Pat<(int_nvvm_sust_b_2d_i16_zero
5811           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5812          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5813           Int16Regs:$r)>;
5814
5815def : Pat<(int_nvvm_sust_b_2d_i32_zero
5816           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5817          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5818           Int32Regs:$r)>;
5819
5820def : Pat<(int_nvvm_sust_b_2d_i64_zero
5821           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5822          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5823           Int64Regs:$r)>;
5824
5825def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5826          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5827          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5828           Int16Regs:$r, Int16Regs:$g)>;
5829
5830def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5831          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5832          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5833           Int16Regs:$r, Int16Regs:$g)>;
5834
5835def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5836          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5837          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5838           Int32Regs:$r, Int32Regs:$g)>;
5839
5840def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5841          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5842          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5843           Int64Regs:$r, Int64Regs:$g)>;
5844
5845def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5846           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5847           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5848          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5849           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5850
5851def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5852           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5853           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5854          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5855           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5856
5857def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5858           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5859           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5860          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5861           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5862
5863
5864
5865def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5866          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5867          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5868           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5869           Int16Regs:$r)>;
5870
5871def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5872          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5874           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5875           Int16Regs:$r)>;
5876
5877def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5878          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5879          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5880           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5881           Int32Regs:$r)>;
5882
5883def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5884          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5885          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5886           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5887           Int64Regs:$r)>;
5888
5889def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5890           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5891           Int16Regs:$r, Int16Regs:$g),
5892          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5893           Int32Regs:$x, Int32Regs:$y,
5894           Int16Regs:$r, Int16Regs:$g)>;
5895
5896def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5897           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5898           Int16Regs:$r, Int16Regs:$g),
5899          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5900           Int32Regs:$x, Int32Regs:$y,
5901           Int16Regs:$r, Int16Regs:$g)>;
5902
5903def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5904           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5905           Int32Regs:$g),
5906          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5907           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5908
5909def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5910           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5911           Int64Regs:$g),
5912          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5913           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5914
5915def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5916           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5917           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5918          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5919           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5920           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5921
5922def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5923           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5924           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5925          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5926           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5927           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5928
5929def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5930           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5931           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5932          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5933           Int32Regs:$x, Int32Regs:$y,
5934           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5935
5936
5937
5938def : Pat<(int_nvvm_sust_b_3d_i8_zero
5939           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5940           Int16Regs:$r),
5941          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5942           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5943           Int16Regs:$r)>;
5944
5945def : Pat<(int_nvvm_sust_b_3d_i16_zero
5946           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5947           Int16Regs:$r),
5948          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5949           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5950           Int16Regs:$r)>;
5951
5952def : Pat<(int_nvvm_sust_b_3d_i32_zero
5953           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5954           Int32Regs:$r),
5955          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5956           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5957           Int32Regs:$r)>;
5958
5959def : Pat<(int_nvvm_sust_b_3d_i64_zero
5960           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5961           Int64Regs:$r),
5962          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5963           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5964           Int64Regs:$r)>;
5965
5966def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5967           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5968           Int16Regs:$r, Int16Regs:$g),
5969          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5970           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5971           Int16Regs:$r, Int16Regs:$g)>;
5972
5973def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5974           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5975           Int16Regs:$r, Int16Regs:$g),
5976          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5977           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5978           Int16Regs:$r, Int16Regs:$g)>;
5979
5980def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5981           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5982           Int32Regs:$r, Int32Regs:$g),
5983          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5984           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5985           Int32Regs:$r, Int32Regs:$g)>;
5986
5987def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5988           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5989           Int64Regs:$r, Int64Regs:$g),
5990          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5991           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5992           Int64Regs:$r, Int64Regs:$g)>;
5993
5994def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5995           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5996           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5997          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5998           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5999           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6000
6001def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6002           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6003           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6004          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
6005           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6006           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6007
6008def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6009           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6010           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6011          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
6012           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6013           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6014
6015
6016
6017
6018def : Pat<(int_nvvm_sust_p_1d_i8_trap
6019           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6020          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6021
6022def : Pat<(int_nvvm_sust_p_1d_i16_trap
6023           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6024          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6025
6026def : Pat<(int_nvvm_sust_p_1d_i32_trap
6027           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6028          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6029
6030def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6031           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6032          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6033           Int16Regs:$r, Int16Regs:$g)>;
6034
6035def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6036           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6037          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6038           Int16Regs:$r, Int16Regs:$g)>;
6039
6040def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6041           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6042          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6043           Int32Regs:$r, Int32Regs:$g)>;
6044
6045def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6046           Int64Regs:$s, Int32Regs:$x,
6047           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6048          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6049           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6050
6051def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6052           Int64Regs:$s, Int32Regs:$x,
6053           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6054          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6055           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6056
6057def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6058           Int64Regs:$s, Int32Regs:$x,
6059           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6060          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6061           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6062
6063
6064
6065def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6066           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6067          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6068           Int16Regs:$r)>;
6069
6070def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6071           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6072          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6073           Int16Regs:$r)>;
6074
6075def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6076           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6077          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6078           Int32Regs:$r)>;
6079
6080def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6081          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6082          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6083           Int16Regs:$r, Int16Regs:$g)>;
6084
6085def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6086          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6087          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6088           Int16Regs:$r, Int16Regs:$g)>;
6089
6090def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6091          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6092          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6093           Int32Regs:$r, Int32Regs:$g)>;
6094
6095def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6096           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6097           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6098          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6099           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6100
6101def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6102           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6103           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6104          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6105           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6106
6107def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6108           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6110          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6111           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6112
6113
6114
6115def : Pat<(int_nvvm_sust_p_2d_i8_trap
6116           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6117          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6118           Int16Regs:$r)>;
6119
6120def : Pat<(int_nvvm_sust_p_2d_i16_trap
6121           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6122          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6123           Int16Regs:$r)>;
6124
6125def : Pat<(int_nvvm_sust_p_2d_i32_trap
6126           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6127          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6128           Int32Regs:$r)>;
6129
6130def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6131          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6132          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6133           Int16Regs:$r, Int16Regs:$g)>;
6134
6135def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6136          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6137          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6138           Int16Regs:$r, Int16Regs:$g)>;
6139
6140def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6141          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6142          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6143           Int32Regs:$r, Int32Regs:$g)>;
6144
6145def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6146           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6147           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6148          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6149           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6150
6151def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6152           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6153           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6154          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6155           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6156
6157def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6158           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6159           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6160          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6161           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6162
6163
6164
6165def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6166          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6167          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6168           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6169           Int16Regs:$r)>;
6170
6171def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6172          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6173          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6174           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6175           Int16Regs:$r)>;
6176
6177def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6178          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6179          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6180           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6181           Int32Regs:$r)>;
6182
6183def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6184           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6185           Int16Regs:$r, Int16Regs:$g),
6186          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6187           Int32Regs:$x, Int32Regs:$y,
6188           Int16Regs:$r, Int16Regs:$g)>;
6189
6190def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6191           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6192           Int16Regs:$r, Int16Regs:$g),
6193          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6194           Int32Regs:$x, Int32Regs:$y,
6195           Int16Regs:$r, Int16Regs:$g)>;
6196
6197def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6198           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6199           Int32Regs:$g),
6200          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6201           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6202
6203def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6204           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6205           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6206          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6207           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6208           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6209
6210def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6211           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6212           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6213          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6214           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6215           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6216
6217def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6218           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6219           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6220          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6221           Int32Regs:$x, Int32Regs:$y,
6222           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6223
6224
6225
6226def : Pat<(int_nvvm_sust_p_3d_i8_trap
6227           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6228           Int16Regs:$r),
6229          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6230           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6231           Int16Regs:$r)>;
6232
6233def : Pat<(int_nvvm_sust_p_3d_i16_trap
6234           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6235           Int16Regs:$r),
6236          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6237           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6238           Int16Regs:$r)>;
6239
6240def : Pat<(int_nvvm_sust_p_3d_i32_trap
6241           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6242           Int32Regs:$r),
6243          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6244           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6245           Int32Regs:$r)>;
6246
6247def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6248           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6249           Int16Regs:$r, Int16Regs:$g),
6250          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6251           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6252           Int16Regs:$r, Int16Regs:$g)>;
6253
6254def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6255           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6256           Int16Regs:$r, Int16Regs:$g),
6257          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6258           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6259           Int16Regs:$r, Int16Regs:$g)>;
6260
6261def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6262           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6263           Int32Regs:$r, Int32Regs:$g),
6264          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6265           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6266           Int32Regs:$r, Int32Regs:$g)>;
6267
6268def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6269           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6270           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6271          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6272           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6273           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6274
6275def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6276           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6277           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6278          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6279           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6280           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6281
6282def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6283           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6284           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6285          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6286           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6287           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6288
6289//-----------------------------------
6290// Read Special Registers
6291//-----------------------------------
6292
6293class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6294  : NVPTXInst<(outs Int64Regs:$d), (ins),
6295              !strconcat("mov.u64 \t$d, %", regname, ";"),
6296              [(set Int64Regs:$d, (intop))]>,
6297    Requires<Preds>;
6298
6299class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6300  : NVPTXInst<(outs Int32Regs:$d), (ins),
6301              !strconcat("mov.u32 \t$d, %", regname, ";"),
6302              [(set Int32Regs:$d, (intop))]>,
6303    Requires<Preds>;
6304
6305multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6306   foreach suffix = ["x", "y", "z", "w"] in {
6307      defvar reg = regname # "." # suffix;
6308      defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6309      def "_"#suffix :  PTX_READ_SREG_R32<reg, intr, Preds>;
6310   }
6311}
6312
6313// TODO Add read vector-version of special registers
6314
6315defm INT_PTX_SREG_TID   : PTX_READ_SREG_R32V4<"tid">;
6316defm INT_PTX_SREG_NTID  : PTX_READ_SREG_R32V4<"ntid">;
6317defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6318defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6319
6320defm INT_PTX_SREG_CLUSTERID :
6321       PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6322defm INT_PTX_SREG_NCLUSTERID :
6323       PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6324defm INT_PTX_SREG_CLUSTER_CTAID :
6325       PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6326defm INT_PTX_SREG_CLUSTER_NCTAID:
6327       PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6328
6329def  INT_PTX_SREG_CLUSTER_CTARANK :
6330       PTX_READ_SREG_R32<"cluster_ctarank",
6331                         int_nvvm_read_ptx_sreg_cluster_ctarank,
6332                         [hasSM<90>, hasPTX<78>]>;
6333def  INT_PTX_SREG_CLUSTER_NCTARANK:
6334       PTX_READ_SREG_R32<"cluster_nctarank",
6335                         int_nvvm_read_ptx_sreg_cluster_nctarank,
6336                         [hasSM<90>, hasPTX<78>]>;
6337
6338
6339def INT_PTX_SREG_LANEID :
6340    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6341def INT_PTX_SREG_WARPID :
6342    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6343def INT_PTX_SREG_NWARPID :
6344    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6345def INT_PTX_SREG_SMID :
6346    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6347def INT_PTX_SREG_NSMID :
6348    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6349def INT_PTX_SREG_GRIDID :
6350    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6351
6352def INT_PTX_SREG_LANEMASK_EQ :
6353    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6354def INT_PTX_SREG_LANEMASK_LE :
6355    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6356def INT_PTX_SREG_LANEMASK_LT :
6357    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6358def INT_PTX_SREG_LANEMASK_GE :
6359    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6360def INT_PTX_SREG_LANEMASK_GT :
6361    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6362
6363def INT_PTX_SREG_CLOCK :
6364    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6365def INT_PTX_SREG_CLOCK64 :
6366    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6367
6368def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6369def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6370def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6371def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6372
6373// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6374// handle the constant.
6375def INT_PTX_SREG_WARPSIZE :
6376    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6377              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6378
6379// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6380// In addition to target-independent fields provided by WMMA_REGS, it adds
6381// the fields commonly used to implement specific PTX instruction -- register
6382// types and names, constraints, parts of assembly, etc.
6383class WMMA_REGINFO<WMMA_REGS r, string op>
6384      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6385  // NVPTX register types used to carry fragment data.
6386  NVPTXRegClass regclass = !cond(
6387    !eq(ptx_elt_type, "f16") : Int32Regs,
6388    !eq(ptx_elt_type, "f32") : Float32Regs,
6389    !eq(ptx_elt_type, "f64") : Float64Regs,
6390    !eq(ptx_elt_type, "bf16") : Int32Regs,
6391    !eq(ptx_elt_type, "tf32") : Int32Regs,
6392    !eq(ptx_elt_type, "s32") : Int32Regs,
6393    !eq(ptx_elt_type, "b16") : Int32Regs,
6394    !eq(ptx_elt_type, "s8") : Int32Regs,
6395    !eq(ptx_elt_type, "u8") : Int32Regs,
6396    !eq(ptx_elt_type, "s4") : Int32Regs,
6397    !eq(ptx_elt_type, "u4") : Int32Regs,
6398    !eq(ptx_elt_type, "b1") : Int32Regs);
6399
6400  // Instruction input/output arguments for the fragment.
6401  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6402
6403  // List of register names for the fragment -- ["ra0", "ra1",...]
6404  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6405
6406  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6407  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6408
6409  // Predicates for particular fragment variant. Technically those are
6410  // per-instruction predicates, but currently all fragments that can be used in
6411  // a given instruction are subject to the same constraints, so an instruction
6412  // can use predicates from any of its fragments. If/when this is no
6413  // longer the case, we can concat all per-fragment predicates to enforce that
6414  // all fragments of the instruction are viable.
6415  list<Predicate> Predicates = !cond(
6416    // fp16 -> fp16/fp32 @ m16n16k16
6417    !and(!eq(geom, "m16n16k16"),
6418         !or(!eq(ptx_elt_type, "f16"),
6419             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6420
6421    !and(!eq(geom,"m8n8k4"),
6422         !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6423
6424    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6425    !and(!or(!eq(geom, "m8n32k16"),
6426             !eq(geom, "m32n8k16")),
6427         !or(!eq(ptx_elt_type, "f16"),
6428             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6429
6430    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6431    !and(!or(!eq(geom,"m16n16k16"),
6432             !eq(geom,"m8n32k16"),
6433             !eq(geom,"m32n8k16")),
6434         !or(!eq(ptx_elt_type, "u8"),
6435             !eq(ptx_elt_type, "s8"),
6436             !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6437
6438    !and(!or(!eq(geom,"m16n16k16"),
6439             !eq(geom,"m8n32k16"),
6440             !eq(geom,"m32n8k16")),
6441         !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6442
6443    !and(!eq(geom,"m16n16k8"),
6444         !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6445
6446    !and(!eq(geom,"m16n16k8"),
6447         !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6448
6449    // b1 -> s32 @ m8n8k128(b1)
6450    !and(!ne(op,"mma"),
6451         !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6452
6453    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6454    !and(!ne(op,"mma"),
6455         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6456
6457    !or(!eq(geom,"m16n8k8"),
6458        !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6459
6460    !and(!ne(ptx_elt_type,"f64"),
6461         !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6462
6463    // mma m8n8k32 requires higher PTX version
6464    !and(!eq(op,"mma"),
6465         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6466
6467    !and(!eq(ptx_elt_type,"f64"),
6468         !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6469
6470    !and(!eq(op,"mma"),
6471         !or(!eq(geom, "m16n8k16"),
6472             !eq(geom, "m16n8k4"),
6473             !eq(geom, "m16n8k32"),
6474             !eq(geom, "m16n8k64"),
6475             !eq(geom, "m8n8k128"),
6476             !eq(geom, "m16n8k128"),
6477             !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6478
6479    !and(!eq(op,"ldmatrix"),
6480         !eq(ptx_elt_type,"b16"),
6481         !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6482
6483  // template DAGs for instruction inputs/output.
6484  dag Outs = !dag(outs, ptx_regs, reg_names);
6485  dag Ins = !dag(ins, ptx_regs, reg_names);
6486}
6487
6488// Convert dag of arguments into a dag to match given intrinsic.
6489class BuildPatternI<Intrinsic Intr, dag Ins> {
6490  // Build a dag pattern that matches the intrinsic call.
6491  dag ret = !foreach(tmp, Ins,
6492                          !subst(imem, ADDRvar,
6493                          !subst(MEMri64, ADDRri64,
6494                          !subst(MEMri, ADDRri,
6495                          !subst(ins, Intr, tmp)))));
6496}
6497
6498// Same as above, but uses PatFrag instead of an Intrinsic.
6499class BuildPatternPF<PatFrag Intr, dag Ins> {
6500  // Build a dag pattern that matches the intrinsic call.
6501  dag ret = !foreach(tmp, Ins,
6502                          !subst(imem, ADDRvar,
6503                          !subst(MEMri64, ADDRri64,
6504                          !subst(MEMri, ADDRri,
6505                          !subst(ins, Intr, tmp)))));
6506}
6507
6508// Common WMMA-related fields used for building patterns for all MMA instructions.
6509class WMMA_INSTR<string _Intr, list<dag> _Args>
6510  : NVPTXInst<(outs), (ins), "?", []> {
6511  Intrinsic Intr = !cast<Intrinsic>(_Intr);
6512  // Concatenate all arguments into a single dag.
6513  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6514  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6515  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6516}
6517
6518//
6519// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6520//
6521
6522class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6523                DAGOperand SrcOp>
6524  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6525                              [!con((ins SrcOp:$src),
6526                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6527    Requires<Frag.Predicates> {
6528  // Load/store intrinsics are overloaded on pointer's address space.
6529  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6530  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6531  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6532  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6533  // Build PatFrag that only matches particular address space.
6534  PatFrag IntrFrag = PatFrag<PFOperands,
6535                             PFOperandsIntr,
6536                             !cond(!eq(Space, ".shared"): AS_match.shared,
6537                                   !eq(Space, ".global"): AS_match.global,
6538                                   true: AS_match.generic)>;
6539  // Build AS-constrained pattern.
6540  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6541
6542  let OutOperandList = Frag.Outs;
6543  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6544  let AsmString = "wmma.load."
6545                  # Frag.frag
6546                  # ".sync"
6547                  # "${ptx:aligned}"
6548                  # "." # Layout
6549                  # "." # Frag.geom
6550                  # Space
6551                  # "." # Frag.ptx_elt_type # " \t"
6552                  # Frag.regstring
6553                  # ", [$src]"
6554                  # !if(WithStride, ", $ldm", "")
6555                  # ";";
6556}
6557
6558//
6559// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6560//
6561class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6562                   bit WithStride, DAGOperand DstOp>
6563  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6564               [!con((ins DstOp:$dst),
6565                     Frag.Ins,
6566                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6567    Requires<Frag.Predicates> {
6568
6569  // Load/store intrinsics are overloaded on pointer's address space.
6570  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6571  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6572  dag PFOperands = !con((ops node:$dst),
6573                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6574                        !if(WithStride, (ops node:$ldm), (ops)));
6575  // Build PatFrag that only matches particular address space.
6576  PatFrag IntrFrag = PatFrag<PFOperands,
6577                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6578                             !cond(!eq(Space, ".shared"): AS_match.shared,
6579                                   !eq(Space, ".global"): AS_match.global,
6580                                   true: AS_match.generic)>;
6581  // Build AS-constrained pattern.
6582  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6583
6584  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6585  let OutOperandList = (outs);
6586  let AsmString = "wmma.store.d.sync"
6587                  # "${ptx:aligned}"
6588                  # "." # Layout
6589                  # "." # Frag.geom
6590                  # Space
6591                  # "." # Frag.ptx_elt_type
6592                  # " \t[$dst],"
6593                  # Frag.regstring
6594                  # !if(WithStride, ", $ldm", "")
6595                  # ";";
6596}
6597
6598// Create all load/store variants
6599defset list<WMMA_INSTR> MMA_LDSTs  = {
6600  foreach layout = ["row", "col"] in {
6601    foreach stride = [false, true] in {
6602      foreach space = [".global", ".shared", ""] in {
6603        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6604          foreach frag = NVVM_MMA_OPS.all_ld_ops in
6605            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6606              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6607          foreach frag = NVVM_MMA_OPS.all_st_ops in
6608            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6609              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6610        } // addr
6611      } // space
6612    } // stride
6613  } // layout
6614} // defset
6615
6616// B1 instruction variants need extra constraints.
6617class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6618  string Op = b1op;
6619  WMMA_REGINFO Frag = FragA;
6620  list<Predicate> ret = !listconcat(
6621    FragA.Predicates,
6622    !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6623  );
6624}
6625// WMMA.MMA
6626class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6627               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6628               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6629  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6630                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
6631    // Requires does not seem to have effect on Instruction w/o Patterns.
6632    // We set it here anyways and propagate to the Pat<> we construct below.
6633    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6634  let OutOperandList = FragD.Outs;
6635  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6636  string TypeList = !cond(
6637    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6638                                     # "." # FragC.ptx_elt_type,
6639    1: "." # FragD.ptx_elt_type
6640       # "." # FragA.ptx_elt_type
6641       # "." # FragB.ptx_elt_type
6642       # "." # FragC.ptx_elt_type,
6643  );
6644  let AsmString = "wmma.mma"
6645                  # b1op
6646                  # ".sync"
6647                  # "${ptx:aligned}"
6648                  # "." # ALayout
6649                  # "." # BLayout
6650                  # "." # FragA.geom
6651                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6652                  # TypeList
6653                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6654                  # FragD.regstring # ",\n\t\t"
6655                  # FragA.regstring # ",\n\t\t"
6656                  # FragB.regstring # ",\n\t\t"
6657                  # FragC.regstring # ";";
6658}
6659
6660defset list<WMMA_INSTR> WMMAs  = {
6661  foreach layout_a = ["row", "col"] in {
6662    foreach layout_b = ["row", "col"] in {
6663      foreach satf = [0, 1] in {
6664        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6665          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6666            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6667              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6668                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6669                              WMMA_REGINFO<op[1], "wmma.mma">,
6670                              WMMA_REGINFO<op[2], "wmma.mma">,
6671                              WMMA_REGINFO<op[3], "wmma.mma">,
6672                              layout_a, layout_b, satf, rnd, b1op>;
6673              }
6674            } // b1op
6675          } // op
6676        } // rnd
6677      } // satf
6678    } // layout_b
6679  } // layout_a
6680} // defset
6681
6682// MMA
6683class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6684               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6685               string ALayout, string BLayout, int Satfinite, string b1op>
6686  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6687                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
6688    // Requires does not seem to have effect on Instruction w/o Patterns.
6689    // We set it here anyways and propagate to the Pat<> we construct below.
6690  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6691  let OutOperandList = FragD.Outs;
6692  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6693  string TypeList = "." # FragD.ptx_elt_type
6694                    # "." # FragA.ptx_elt_type
6695                    # "." # FragB.ptx_elt_type
6696                    # "." # FragC.ptx_elt_type;
6697  let AsmString = "mma.sync.aligned."
6698                  # FragA.geom
6699                  # "." # ALayout
6700                  # "." # BLayout
6701                  # !if(Satfinite, ".satfinite", "")
6702                  # TypeList
6703                  # b1op # "\n\t\t"
6704                  # FragD.regstring # ",\n\t\t"
6705                  # FragA.regstring # ",\n\t\t"
6706                  # FragB.regstring # ",\n\t\t"
6707                  # FragC.regstring # ";";
6708}
6709
6710defset list<WMMA_INSTR> MMAs  = {
6711  foreach layout_a = ["row", "col"] in {
6712    foreach layout_b = ["row", "col"] in {
6713      foreach satf = [0, 1] in {
6714        foreach op = NVVM_MMA_OPS.all_mma_ops in {
6715          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6716            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6717              def : MMA<WMMA_REGINFO<op[0], "mma">,
6718                        WMMA_REGINFO<op[1], "mma">,
6719                        WMMA_REGINFO<op[2], "mma">,
6720                        WMMA_REGINFO<op[3], "mma">,
6721                        layout_a, layout_b, satf, b1op>;
6722            }
6723          } // b1op
6724        } // op
6725      } // satf
6726    } // layout_b
6727  } // layout_a
6728} // defset
6729
6730//
6731// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6732//
6733class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6734               DAGOperand SrcOp>
6735  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6736    Requires<Frag.Predicates> {
6737  // Build PatFrag that only matches particular address space.
6738  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6739                             !cond(!eq(Space, ".shared"): AS_match.shared,
6740                                   true: AS_match.generic)>;
6741  // Build AS-constrained pattern.
6742  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6743
6744  let OutOperandList = Frag.Outs;
6745  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6746  let AsmString = "ldmatrix.sync.aligned."
6747                  # Frag.geom
6748                  # "." # Frag.frag
6749                  # !if(Transposed, ".trans", "")
6750                  # Space
6751                  # "." # Frag.ptx_elt_type
6752                  # " " # Frag.regstring # ", [$src];";
6753}
6754
6755// Create all ldmatrix variants
6756defset list<WMMA_INSTR> LDMATRIXs  = {
6757  foreach transposed = [false, true] in {
6758    foreach space = [".shared", ""] in {
6759      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6760        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6761          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6762            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6763                            addr>;
6764      } // addr
6765    } // space
6766  } // transposed
6767} // defset
6768
6769// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6770// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6771// the instruction record.
6772class MMA_PAT<WMMA_INSTR wi>
6773      : Pat<wi.IntrinsicPattern,
6774            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6775                 (wi ptx.version))>,
6776        Requires<wi.Predicates>;
6777
6778// Build intrinsic->instruction patterns for all MMA instructions.
6779foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6780  def : MMA_PAT<mma>;
6781
6782multiclass MAPA<string suffix, Intrinsic Intr> {
6783  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6784              "mapa" # suffix # ".u32\t$d, $a, $b;",
6785              [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6786    Requires<[hasSM<90>, hasPTX<78>]>;
6787  def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6788              "mapa" # suffix # ".u32\t$d, $a, $b;",
6789              [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6790    Requires<[hasSM<90>, hasPTX<78>]>;
6791  def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6792              "mapa" # suffix # ".u64\t$d, $a, $b;",
6793              [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6794    Requires<[hasSM<90>, hasPTX<78>]>;
6795  def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6796              "mapa" # suffix # ".u64\t$d, $a, $b;",
6797              [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6798    Requires<[hasSM<90>, hasPTX<78>]>;
6799}
6800
6801defm mapa  : MAPA<"", int_nvvm_mapa>;
6802defm mapa_shared_cluster  : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6803
6804
6805multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6806  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6807              "getctarank" # suffix # ".u32\t$d, $a;",
6808              [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6809    Requires<[hasSM<90>, hasPTX<78>]>;
6810  def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6811              "getctarank" # suffix # ".u64\t$d, $a;",
6812              [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6813    Requires<[hasSM<90>, hasPTX<78>]>;
6814}
6815
6816defm getctarank  : GETCTARANK<"", int_nvvm_getctarank>;
6817defm getctarank_shared_cluster  : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6818
6819def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6820              "mov.pred\t$d, %is_explicit_cluster;",
6821              [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6822    Requires<[hasSM<90>, hasPTX<78>]>;
6823
6824// setmaxnreg inc/dec intrinsics
6825let isConvergent = true in {
6826multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
6827  def : NVPTXInst<(outs), (ins i32imm:$reg_count),
6828          "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
6829          [(Intr timm:$reg_count)]>,
6830    Requires<[hasSM90a, hasPTX<80>]>;
6831}
6832
6833defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
6834defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
6835} // isConvergent
6836