xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (revision b1879975794772ee51f0b4865753364c7d7626c3)
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX<60>, hasSM<30>]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX<60>, hasSM<30>]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX<60>, hasSM<30>]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX<60>, hasSM<30>]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX<60>, hasSM<30>]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX<60>, hasSM<30>]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX<60>, hasSM<30>]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX<60>, hasSM<30>]>;
135
136class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
137                          list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
138        NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
139        Requires<Preds>;
140
141def barrier_cluster_arrive:
142        INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
143def barrier_cluster_arrive_relaxed:
144        INT_BARRIER_CLUSTER<"arrive.relaxed",
145        int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
146def barrier_cluster_wait:
147        INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
148
149// 'aligned' versions of the cluster barrier intrinsics
150def barrier_cluster_arrive_aligned:
151        INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
152def barrier_cluster_arrive_relaxed_aligned:
153        INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
154        int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
155def barrier_cluster_wait_aligned:
156        INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
157
158class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
159                 bit offset_imm, bit mask_imm, bit threadmask_imm>
160      : NVPTXInst<(outs), (ins), "?", []> {
161  NVPTXRegClass rc = !cond(
162    !eq(reg, "i32"): Int32Regs,
163    !eq(reg, "f32"): Float32Regs);
164  string IntrName = "int_nvvm_shfl_"
165                    # !if(sync, "sync_", "")
166                    # mode
167                    # "_" # reg
168                    # !if(return_pred, "p", "");
169  Intrinsic Intr = !cast<Intrinsic>(IntrName);
170  let InOperandList = !con(
171    !if(sync,
172        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
173        (ins)),
174    (ins rc:$src),
175    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
176    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
177    );
178  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
179  let AsmString = "shfl."
180     # !if(sync, "sync.", "")
181     # mode # ".b32\t"
182     # "$dst"
183     # !if(return_pred, "|$pred", "") # ", "
184     # "$src, $offset, $mask"
185     # !if(sync, ", $threadmask", "")
186     # ";"
187     ;
188  let Pattern = [!con(
189      !foreach(tmp, OutOperandList,
190             !subst(outs, set,
191             !subst(i32imm, imm, tmp))),
192      (set !foreach(tmp, InOperandList,
193             !subst(ins, Intr,
194             !subst(i32imm, imm, tmp))))
195  )];
196}
197
198foreach sync = [false, true] in {
199  foreach mode = ["up", "down", "bfly", "idx"] in {
200    foreach regclass = ["i32", "f32"] in {
201      foreach return_pred = [false, true] in {
202        foreach offset_imm = [false, true] in {
203          foreach mask_imm = [false, true] in {
204            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
205              def : SHFL_INSTR<sync, mode, regclass, return_pred,
206                               offset_imm, mask_imm, threadmask_imm>,
207                    Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
208            }
209          }
210        }
211      }
212    }
213  }
214}
215
216// vote.{all,any,uni,ballot}
217multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
218  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
219              "vote." # mode # " \t$dest, $pred;",
220              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
221        Requires<[hasPTX<60>, hasSM<30>]>;
222}
223
224defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
225defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
226defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
227defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
228
229// vote.sync.{all,any,uni,ballot}
230multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
231  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
232              "vote.sync." # mode # " \t$dest, $pred, $mask;",
233              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
234          Requires<[hasPTX<60>, hasSM<30>]>;
235  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
236              "vote.sync." # mode #" \t$dest, $pred, $mask;",
237              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
238          Requires<[hasPTX<60>, hasSM<30>]>;
239}
240
241defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
242defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
243defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
244defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
245
246multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
247                          Operand ImmOp> {
248  def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
249              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
250              [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
251           Requires<[hasPTX<60>, hasSM<70>]>;
252  def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
253              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
254              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
255           Requires<[hasPTX<60>, hasSM<70>]>;
256  def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
257              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
258              [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
259           Requires<[hasPTX<60>, hasSM<70>]>;
260  def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
261              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
262              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
263           Requires<[hasPTX<60>, hasSM<70>]>;
264}
265
266// activemask.b32
267def ACTIVEMASK : NVPTXInst<(outs Int32Regs:$dest), (ins),
268                    "activemask.b32 \t$dest;",
269                    [(set Int32Regs:$dest, (int_nvvm_activemask))]>,
270                 Requires<[hasPTX<62>, hasSM<30>]>;
271
272defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
273                                        i32imm>;
274defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
275                                        i64imm>;
276
277multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
278                          Operand ImmOp> {
279  def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
280                     (ins i32imm:$mask, ImmOp:$value),
281              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
282              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
283           Requires<[hasPTX<60>, hasSM<70>]>;
284  def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
285                     (ins Int32Regs:$mask, ImmOp:$value),
286              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
287              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
288           Requires<[hasPTX<60>, hasSM<70>]>;
289  def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
290                     (ins i32imm:$mask, regclass:$value),
291              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
292              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
293           Requires<[hasPTX<60>, hasSM<70>]>;
294  def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
295                     (ins Int32Regs:$mask, regclass:$value),
296              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
297              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
298           Requires<[hasPTX<60>, hasSM<70>]>;
299}
300defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
301                                         i32imm>;
302defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
303                                         i64imm>;
304
305multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
306  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
307          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
308          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
309        Requires<[hasPTX<70>, hasSM<80>]>;
310}
311
312defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
313defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
314defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
315defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
316defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
317defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
318defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
319defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
320
321} // isConvergent = true
322
323//-----------------------------------
324// Explicit Memory Fence Functions
325//-----------------------------------
326class MEMBAR<string StrOp, Intrinsic IntOP> :
327              NVPTXInst<(outs), (ins),
328            StrOp, [(IntOP)]>;
329
330def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
331def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
332def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
333
334def INT_FENCE_SC_CLUSTER:
335       MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
336       Requires<[hasPTX<78>, hasSM<90>]>;
337
338//-----------------------------------
339// Async Copy Functions
340//-----------------------------------
341
342multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
343  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
344            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
345            [(Intrin Int32Regs:$addr)]>,
346    Requires<[hasPTX<70>, hasSM<80>]>;
347  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
348            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
349            [(Intrin Int64Regs:$addr)]>,
350    Requires<[hasPTX<70>, hasSM<80>]>;
351}
352
353defm CP_ASYNC_MBARRIER_ARRIVE :
354  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
355defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
356  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
357defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
358  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
359defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
360  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
361
362multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
363  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
364            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
365            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
366    Requires<[hasPTX<70>, hasSM<80>]>;
367  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
368            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
369            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
370    Requires<[hasPTX<70>, hasSM<80>]>;
371  // Variant with src_size parameter
372  def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
373             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
374             [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
375    Requires<[hasPTX<70>, hasSM<80>]>;
376  def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
377             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
378             [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
379    Requires<[hasPTX<70>, hasSM<80>]>;
380  def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
381             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
382             [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
383    Requires<[hasPTX<70>, hasSM<80>]>;
384  def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
385             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
386             [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
387    Requires<[hasPTX<70>, hasSM<80>]>;
388}
389
390defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
391  CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
392                                      int_nvvm_cp_async_ca_shared_global_4_s>;
393
394defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
395  CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
396                                      int_nvvm_cp_async_ca_shared_global_8_s>;
397
398defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
399  CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
400                                       int_nvvm_cp_async_ca_shared_global_16_s>;
401
402defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
403  CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
404                                       int_nvvm_cp_async_cg_shared_global_16_s>;
405
406def CP_ASYNC_COMMIT_GROUP :
407  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
408  Requires<[hasPTX<70>, hasSM<80>]>;
409
410def CP_ASYNC_WAIT_GROUP :
411  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
412  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
413  Requires<[hasPTX<70>, hasSM<80>]>;
414
415def CP_ASYNC_WAIT_ALL :
416  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
417  [(int_nvvm_cp_async_wait_all)]>,
418  Requires<[hasPTX<70>, hasSM<80>]>;
419
420// cp.async.bulk variants of the commit/wait group
421def CP_ASYNC_BULK_COMMIT_GROUP :
422  NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
423  [(int_nvvm_cp_async_bulk_commit_group)]>,
424  Requires<[hasPTX<80>, hasSM<90>]>;
425
426def CP_ASYNC_BULK_WAIT_GROUP :
427  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
428  [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
429  Requires<[hasPTX<80>, hasSM<90>]>;
430
431def CP_ASYNC_BULK_WAIT_GROUP_READ :
432  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
433  [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
434  Requires<[hasPTX<80>, hasSM<90>]>;
435
436//-----------------------------------
437// MBarrier Functions
438//-----------------------------------
439
440multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
441  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
442           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
443    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
444    Requires<[hasPTX<70>, hasSM<80>]>;
445  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
446           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
447    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
448    Requires<[hasPTX<70>, hasSM<80>]>;
449}
450
451defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
452defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
453                                          int_nvvm_mbarrier_init_shared>;
454
455multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
456  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
457           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
458    [(Intrin Int32Regs:$addr)]>,
459    Requires<[hasPTX<70>, hasSM<80>]>;
460  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
461           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
462    [(Intrin Int64Regs:$addr)]>,
463    Requires<[hasPTX<70>, hasSM<80>]>;
464}
465
466defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
467defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
468                                            int_nvvm_mbarrier_inval_shared>;
469
470multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
471  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
472           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
473    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
474    Requires<[hasPTX<70>, hasSM<80>]>;
475  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
476           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
477    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
478    Requires<[hasPTX<70>, hasSM<80>]>;
479}
480
481defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
482defm MBARRIER_ARRIVE_SHARED :
483  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
484
485multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
486  def _32 : NVPTXInst<(outs Int64Regs:$state),
487           (ins Int32Regs:$addr, Int32Regs:$count),
488           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
489                      ".b64 $state, [$addr], $count;"),
490    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
491    Requires<[hasPTX<70>, hasSM<80>]>;
492  def _64 : NVPTXInst<(outs Int64Regs:$state),
493           (ins Int64Regs:$addr, Int32Regs:$count),
494           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
495                      ".b64 $state, [$addr], $count;"),
496    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
497    Requires<[hasPTX<70>, hasSM<80>]>;
498}
499
500defm MBARRIER_ARRIVE_NOCOMPLETE :
501  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
502defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
503  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
504
505multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
506  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
507           !strconcat("mbarrier.arrive_drop", AddrSpace,
508                      ".b64 $state, [$addr];"),
509           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
510    Requires<[hasPTX<70>, hasSM<80>]>;
511  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
512           !strconcat("mbarrier.arrive_drop", AddrSpace,
513                      ".b64 $state, [$addr];"),
514           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
515    Requires<[hasPTX<70>, hasSM<80>]>;
516}
517
518defm MBARRIER_ARRIVE_DROP :
519  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
520defm MBARRIER_ARRIVE_DROP_SHARED :
521  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
522
523multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
524  def _32 : NVPTXInst<(outs Int64Regs:$state),
525           (ins Int32Regs:$addr, Int32Regs:$count),
526           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
527                      ".b64 $state, [$addr], $count;"),
528           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
529    Requires<[hasPTX<70>, hasSM<80>]>;
530  def _64 : NVPTXInst<(outs Int64Regs:$state),
531           (ins Int64Regs:$addr, Int32Regs:$count),
532           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
533                      ".b64 $state, [$addr], $count;"),
534           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
535    Requires<[hasPTX<70>, hasSM<80>]>;
536}
537
538defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
539  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
540defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
541  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
542                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
543
544multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
545  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
546           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
547           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
548    Requires<[hasPTX<70>, hasSM<80>]>;
549  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
550           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
551           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
552    Requires<[hasPTX<70>, hasSM<80>]>;
553}
554
555defm MBARRIER_TEST_WAIT :
556  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
557defm MBARRIER_TEST_WAIT_SHARED :
558  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
559
560class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
561           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
562           "mbarrier.pending_count.b64 $res, $state;",
563           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
564    Requires<[hasPTX<70>, hasSM<80>]>;
565
566def MBARRIER_PENDING_COUNT :
567  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
568
569//-----------------------------------
570// Math Functions
571//-----------------------------------
572
573// Map min(1.0, max(0.0, x)) to sat(x)
574// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
575// NaN
576// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
577// Same story for fmax, fmin.
578
579def : Pat<(int_nvvm_fmin_f immFloat1,
580            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
581          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
582def : Pat<(int_nvvm_fmin_f immFloat1,
583            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
584          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
585def : Pat<(int_nvvm_fmin_f
586            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
587          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
588def : Pat<(int_nvvm_fmin_f
589            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
590          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
591
592def : Pat<(int_nvvm_fmin_d immDouble1,
593            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
594          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
595def : Pat<(int_nvvm_fmin_d immDouble1,
596            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
597          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
598def : Pat<(int_nvvm_fmin_d
599            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
600          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
601def : Pat<(int_nvvm_fmin_d
602            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
603          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
604
605
606// We need a full string for OpcStr here because we need to deal with case like
607// INT_PTX_RECIP.
608class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
609  NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
610            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
611            OpcStr,
612        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
613        Requires<Preds>;
614
615// We need a full string for OpcStr here because we need to deal with the case
616// like INT_PTX_NATIVE_POWR_F.
617class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
618  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
619  list<Predicate> Preds = []>
620            : NVPTXInst<(outs t_regclass:$dst),
621              (ins s0_regclass:$src0, s1_regclass:$src1),
622            OpcStr,
623        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
624        Requires<Preds>;
625
626class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
627  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
628  NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
629            : NVPTXInst<(outs t_regclass:$dst),
630              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
631            OpcStr,
632        [(set t_regclass:$dst,
633          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
634          Requires<Preds>;
635
636//
637// MISC
638//
639
640def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
641  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
642
643def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$i;",
644                             [(int_nvvm_nanosleep imm:$i)]>,
645        Requires<[hasPTX<63>, hasSM<70>]>;
646def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;",
647                             [(int_nvvm_nanosleep Int32Regs:$i)]>,
648        Requires<[hasPTX<63>, hasSM<70>]>;
649//
650// Min Max
651//
652
653def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
654  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
655def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
656  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
657def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
658  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
659  [hasPTX<70>, hasSM<80>]>;
660def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
661  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
662  [hasPTX<70>, hasSM<80>]>;
663def INT_NVVM_FMIN_XORSIGN_ABS_F :
664  F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
665    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
666    [hasPTX<72>, hasSM<86>]>;
667def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
668  F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
669    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
670    [hasPTX<72>, hasSM<86>]>;
671def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
672  F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
673    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
674    [hasPTX<72>, hasSM<86>]>;
675def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
676  F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
677    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
678    [hasPTX<72>, hasSM<86>]>;
679
680def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
681  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
682def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
683  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
684def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
685  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
686  [hasPTX<70>, hasSM<80>]>;
687def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
688  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
689  [hasPTX<70>, hasSM<80>]>;
690def INT_NVVM_FMAX_XORSIGN_ABS_F :
691  F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
692    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
693    [hasPTX<72>, hasSM<86>]>;
694def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
695  F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
696    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
697    [hasPTX<72>, hasSM<86>]>;
698def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
699  F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
700    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
701    [hasPTX<72>, hasSM<86>]>;
702def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
703  F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
704    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
705    [hasPTX<72>, hasSM<86>]>;
706
707def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
708  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
709def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
710  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
711
712//
713// Min Max f16, f16x2, bf16, bf16x2
714//
715
716class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
717                    list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
718  string Variant = V;
719  Intrinsic Intr = I;
720  NVPTXRegClass RegClass = RC;
721  list<Predicate> Predicates = Preds;
722}
723
724multiclass MIN_MAX<string IntName> {
725  foreach P = [
726    MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
727      int_nvvm_fmax_f16), Int16Regs>,
728    MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
729      int_nvvm_fmax_ftz_f16), Int16Regs>,
730    MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
731      int_nvvm_fmax_nan_f16), Int16Regs>,
732    MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
733      int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
734    MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
735      int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
736      Int16Regs, [hasPTX<72>, hasSM<86>]>,
737    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
738      int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
739      Int16Regs, [hasPTX<72>, hasSM<86>]>,
740    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
741      int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
742      Int16Regs, [hasPTX<72>, hasSM<86>]>,
743    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
744      int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
745      int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
746    MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
747      int_nvvm_fmax_f16x2), Int32Regs>,
748    MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
749      int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
750    MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
751      int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
752    MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
753      int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
754    MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
755      int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
756      Int32Regs, [hasPTX<72>, hasSM<86>]>,
757    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
758      int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
759      Int32Regs, [hasPTX<72>, hasSM<86>]>,
760    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
761      int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
762      Int32Regs, [hasPTX<72>, hasSM<86>]>,
763    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
764      int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
765      int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
766      Int32Regs, [hasPTX<72>, hasSM<86>]>,
767    MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
768      int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
769    MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
770      int_nvvm_fmax_nan_bf16), Int16Regs>,
771    MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
772      int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
773      Int16Regs, [hasPTX<72>, hasSM<86>]>,
774    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
775      int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
776      Int16Regs, [hasPTX<72>, hasSM<86>]>,
777    MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
778      int_nvvm_fmax_bf16x2), Int32Regs>,
779    MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
780      int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
781    MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
782      int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
783      Int32Regs, [hasPTX<72>, hasSM<86>]>,
784    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
785      int_nvvm_fmin_nan_xorsign_abs_bf16x2,
786      int_nvvm_fmax_nan_xorsign_abs_bf16x2),
787      Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
788        def P.Variant : F_MATH_2<!strconcat(
789          IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
790          P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
791  }
792}
793
794defm INT_NVVM_FMIN : MIN_MAX<"min">;
795defm INT_NVVM_FMAN : MIN_MAX<"max">;
796
797//
798// Multiplication
799//
800
801def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs,
802  Int16Regs, Int16Regs, int_nvvm_mulhi_s>;
803def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs,
804  Int16Regs, Int16Regs, int_nvvm_mulhi_us>;
805def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
806  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
807def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
808  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
809def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
810  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
811def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
812  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
813
814def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
815  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
816def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
817  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
818def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
819  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
820def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
821  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
822def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
823  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
824def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
825  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
826def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
827  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
828def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
829  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
830
831def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
832  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
833def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
834  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
835def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
836  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
837def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
838  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
839
840def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
841  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
842def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
843  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
844
845//
846// Div
847//
848
849def INT_NVVM_DIV_APPROX_FTZ_F
850  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
851    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
852def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
853  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
854
855def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
856  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
857def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
858  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
859def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
860  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
861def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
862  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
863def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
864  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
865def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
866  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
867def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
868  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
869def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
870  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
871
872def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
873  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
874def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
875  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
876def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
877  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
878def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
879  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
880
881//
882// Sad
883//
884
885def INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;",
886  Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>;
887def INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;",
888  Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>;
889def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
890  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
891def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
892  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
893def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;",
894  Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>;
895def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
896  Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>;
897
898//
899// Floor  Ceil
900//
901
902def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
903          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
904def : Pat<(int_nvvm_floor_f Float32Regs:$a),
905          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
906def : Pat<(int_nvvm_floor_d Float64Regs:$a),
907          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
908
909def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
910          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
911def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
912          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
913def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
914          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
915
916//
917// Abs
918//
919
920def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
921  Float32Regs, int_nvvm_fabs_ftz_f>;
922def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
923  Float32Regs, int_nvvm_fabs_f>;
924
925def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
926  Float64Regs, int_nvvm_fabs_d>;
927
928//
929// Abs, Neg bf16, bf16x2
930//
931
932def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
933  Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
934def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
935  Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
936def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
937  Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
938def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
939  Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
940
941//
942// Round
943//
944
945def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
946          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
947def : Pat<(int_nvvm_round_f Float32Regs:$a),
948          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
949def : Pat<(int_nvvm_round_d Float64Regs:$a),
950          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
951
952//
953// Trunc
954//
955
956def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
957          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
958def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
959          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
960def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
961          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
962
963//
964// Saturate
965//
966
967def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
968          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
969def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
970          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
971def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
972          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
973
974//
975// Exp2  Log2
976//
977
978def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
979  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
980def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
981  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
982def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
983  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
984def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
985  Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
986def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
987  Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
988
989def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
990  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
991def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
992  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
993def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
994  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
995
996//
997// Sin  Cos
998//
999
1000def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
1001  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
1002def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
1003  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
1004
1005def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
1006  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
1007def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
1008  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
1009
1010//
1011// Fma
1012//
1013
1014class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
1015                list<Predicate> Preds = []> {
1016  string Variant = V;
1017  Intrinsic Intr = I;
1018  NVPTXRegClass RegClass = RC;
1019  list<Predicate> Predicates = Preds;
1020}
1021
1022multiclass FMA_INST {
1023  foreach P = [
1024    FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
1025    FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
1026    FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
1027    FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
1028
1029    FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
1030    FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
1031    FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
1032    FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
1033    FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
1034    FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
1035    FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
1036    FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
1037
1038    FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
1039    FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
1040      [hasPTX<42>, hasSM<53>]>,
1041    FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
1042      [hasPTX<42>, hasSM<53>]>,
1043    FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
1044      [hasPTX<42>, hasSM<53>]>,
1045    FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
1046      [hasPTX<70>, hasSM<80>]>,
1047    FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
1048      [hasPTX<70>, hasSM<80>]>,
1049
1050    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
1051    FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
1052      [hasPTX<70>, hasSM<80>]>,
1053    FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
1054      [hasPTX<70>, hasSM<80>]>,
1055    FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
1056      [hasPTX<70>, hasSM<80>]>,
1057    FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
1058      [hasPTX<70>, hasSM<80>]>,
1059    FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
1060      [hasPTX<70>, hasSM<80>]>,
1061
1062    FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
1063      [hasPTX<42>, hasSM<53>]>,
1064    FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
1065      [hasPTX<42>, hasSM<53>]>,
1066    FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
1067      [hasPTX<42>, hasSM<53>]>,
1068    FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
1069      Int32Regs, [hasPTX<42>, hasSM<53>]>,
1070    FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
1071      [hasPTX<70>, hasSM<80>]>,
1072    FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
1073      Int32Regs, [hasPTX<70>, hasSM<80>]>,
1074    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
1075      [hasPTX<70>, hasSM<80>]>,
1076    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
1077      [hasPTX<70>, hasSM<80>]>
1078  ] in {
1079    def P.Variant :
1080      F_MATH_3<!strconcat("fma",
1081        !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1082        P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1083  }
1084}
1085
1086defm INT_NVVM_FMA : FMA_INST;
1087
1088//
1089// Rcp
1090//
1091
1092def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1093  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1094def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1095  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1096def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1097  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1098def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1099  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1100def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1101  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1102def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1103  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1104def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1105  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1106def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1107  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1108
1109def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1110  Float64Regs, int_nvvm_rcp_rn_d>;
1111def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1112  Float64Regs, int_nvvm_rcp_rz_d>;
1113def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1114  Float64Regs, int_nvvm_rcp_rm_d>;
1115def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1116  Float64Regs, int_nvvm_rcp_rp_d>;
1117
1118def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1119  Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1120def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1121  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1122
1123//
1124// Sqrt
1125//
1126
1127def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1128  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1129def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1130  Float32Regs, int_nvvm_sqrt_rn_f>;
1131def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1132  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1133def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1134  Float32Regs, int_nvvm_sqrt_rz_f>;
1135def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1136  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1137def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1138  Float32Regs, int_nvvm_sqrt_rm_f>;
1139def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1140  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1141def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1142  Float32Regs, int_nvvm_sqrt_rp_f>;
1143def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1144  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1145def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1146  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1147
1148def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1149  Float64Regs, int_nvvm_sqrt_rn_d>;
1150def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1151  Float64Regs, int_nvvm_sqrt_rz_d>;
1152def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1153  Float64Regs, int_nvvm_sqrt_rm_d>;
1154def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1155  Float64Regs, int_nvvm_sqrt_rp_d>;
1156
1157// nvvm_sqrt intrinsic
1158def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1159          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1160def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1161          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1162def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1163          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1164def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1165          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1166
1167//
1168// Rsqrt
1169//
1170
1171def INT_NVVM_RSQRT_APPROX_FTZ_F
1172  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1173    int_nvvm_rsqrt_approx_ftz_f>;
1174def INT_NVVM_RSQRT_APPROX_FTZ_D
1175  : F_MATH_1<"rsqrt.approx.ftz.f64 \t$dst, $src0;", Float64Regs, Float64Regs,
1176    int_nvvm_rsqrt_approx_ftz_d>;
1177
1178def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1179  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1180def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1181  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1182
1183// 1.0f / sqrt_approx -> rsqrt_approx
1184def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f Float32Regs:$a)),
1185         (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1186         Requires<[doRsqrtOpt]>;
1187def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f Float32Regs:$a)),
1188         (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1189         Requires<[doRsqrtOpt]>;
1190// same for int_nvvm_sqrt_f when non-precision sqrt is requested
1191def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)),
1192         (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1193         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1194def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)),
1195         (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1196         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1197
1198def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)),
1199         (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1200         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1201def: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)),
1202         (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1203         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1204//
1205// Add
1206//
1207
1208def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1209  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1210def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1211  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1212def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1213  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1214def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1215  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1216def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1217  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1218def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1219  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1220def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1221  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1222def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1223  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1224
1225def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1226  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1227def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1228  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1229def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1230  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1231def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1232  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1233
1234//
1235// Convert
1236//
1237
1238def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1239          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1240def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1241          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1242def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1243          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1244def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1245          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1246def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1247          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1248def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1249          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1250def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1251          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1252def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1253          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1254
1255def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1256          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1257def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1258          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1259def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1260          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1261def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1262          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1263
1264def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1265          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1266def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1267          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1268def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1269          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1270def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1271          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1272
1273def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1274          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1275def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1276          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1277def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1278          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1279def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1280          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1281
1282def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1283          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1284def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1285          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1286def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1287          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1288def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1289          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1290
1291def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1292          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1293def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1294          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1295def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1296          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1297def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1298          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1299def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1300          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1301def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1302          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1303def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1304          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1305def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1306          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1307
1308def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1309          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1310def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1311          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1312def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1313          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1314def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1315          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1316def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1317          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1318def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1319          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1320def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1321          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1322def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1323          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1324
1325def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1326          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1327def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1328          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1329def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1330          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1331def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1332          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1333
1334def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1335          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1336def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1337          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1338def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1339          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1340def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1341          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1342
1343def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1344          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1345def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1346          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1347def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1348          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1349def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1350          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1351
1352def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1353          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1354def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1355          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1356def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1357          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1358def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1359          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1360
1361def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1362          (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1363def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1364          (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1365def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1366          (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1367def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1368          (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1369
1370def CVT_tf32_f32 :
1371   NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1372                   "cvt.rna.tf32.f32 \t$dest, $a;",
1373       [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1374
1375def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1376  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1377
1378def INT_NVVM_D2I_LO : F_MATH_1<
1379  !strconcat("{{\n\t",
1380             ".reg .b32 %temp; \n\t",
1381             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1382             "}}"),
1383  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1384def INT_NVVM_D2I_HI : F_MATH_1<
1385  !strconcat("{{\n\t",
1386             ".reg .b32 %temp; \n\t",
1387             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1388             "}}"),
1389  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1390
1391def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1392          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1393def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1394          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1395def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1396          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1397def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1398          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1399def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1400          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1401def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1402          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1403def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1404          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1405def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1406          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1407
1408def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1409          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1410def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1411          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1412def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1413          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1414def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1415          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1416def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1417          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1418def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1419          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1420def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1421          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1422def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1423          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1424
1425def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1426          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1427def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1428          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1429def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1430          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1431def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1432          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1433
1434def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1435          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1436def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1437          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1438def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1439          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1440def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1441          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1442
1443def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1444          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1445def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1446          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1447def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1448          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1449def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1450          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1451
1452def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1453          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1454def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1455          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1456def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1457          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1458def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1459          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1460
1461def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1462          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1463def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1464          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1465def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1466          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1467def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1468          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1469
1470def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1471          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1472def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1473          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1474def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1475          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1476def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1477          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1478
1479
1480def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1481          (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
1482def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1483          (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
1484
1485//
1486// Bitcast
1487//
1488
1489def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1490  Float32Regs, int_nvvm_bitcast_f2i>;
1491def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1492  Int32Regs, int_nvvm_bitcast_i2f>;
1493
1494def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1495  Int64Regs, int_nvvm_bitcast_ll2d>;
1496def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1497  Float64Regs, int_nvvm_bitcast_d2ll>;
1498
1499//
1500// FNS
1501//
1502
1503class INT_FNS_MBO<dag ins, dag Operands>
1504  : NVPTXInst<(outs Int32Regs:$dst), ins,
1505               "fns.b32 \t$dst, $mask, $base, $offset;",
1506               [(set Int32Regs:$dst, Operands )]>,
1507    Requires<[hasPTX<60>, hasSM<30>]>;
1508
1509def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1510                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1511def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1512                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1513def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1514                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1515def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1516                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1517def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1518                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1519def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1520                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1521def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1522                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1523def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1524                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1525
1526//-----------------------------------
1527// Atomic Functions
1528//-----------------------------------
1529
1530class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1531 : PatFrag<ops, frag, AS_match.global>;
1532class ATOMIC_SHARED_CHK <dag ops, dag frag>
1533 : PatFrag<ops, frag, AS_match.shared>;
1534class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1535 : PatFrag<ops, frag, AS_match.generic>;
1536
1537multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1538  ValueType regT, NVPTXRegClass regclass,
1539  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1540  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1541  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1542    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1543    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1544  Requires<Pred>;
1545  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1546    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1547    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1548  Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
1549}
1550multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1551  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1552  list<Predicate> Pred = []> {
1553  defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1554    IntOp, IMMType, IMM, Pred>;
1555  defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1556    IntOp, IMMType, IMM, Pred>;
1557}
1558
1559// has 2 operands, neg the second one
1560multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1561  ValueType regT, NVPTXRegClass regclass,
1562  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1563  list<Predicate> Pred> {
1564  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1565    !strconcat(
1566      "{{ \n\t",
1567      ".reg \t.s", TypeStr, " temp; \n\t",
1568      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1569      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1570      "}}"),
1571    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
1572  Requires<Pred>;
1573}
1574multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1575  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1576 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1577   IntOp, Pred> ;
1578 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1579   IntOp, Pred> ;
1580}
1581
1582// has 3 operands
1583multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
1584  ValueType regT, NVPTXRegClass regclass,
1585  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1586  Operand IMMType, list<Predicate> Pred> {
1587  def reg : NVPTXInst<(outs regclass:$dst),
1588    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1589    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1590    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
1591  Requires<Pred>;
1592
1593  def imm1 : NVPTXInst<(outs regclass:$dst),
1594    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1595    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1596    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
1597  Requires<Pred>;
1598
1599  def imm2 : NVPTXInst<(outs regclass:$dst),
1600    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1601    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1602    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
1603  Requires<Pred>;
1604
1605  def imm3 : NVPTXInst<(outs regclass:$dst),
1606    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1607    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1608    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
1609  Requires<Pred>;
1610}
1611multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1612  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1613  defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1614    IntOp, IMMType, Pred>;
1615  defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1616    IntOp, IMMType, Pred>;
1617}
1618
1619// atom_add
1620
1621def atomic_load_add_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1622  (atomic_load_add_i32 node:$a, node:$b)>;
1623def atomic_load_add_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1624  (atomic_load_add_i32 node:$a, node:$b)>;
1625def atomic_load_add_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1626  (atomic_load_add_i32 node:$a, node:$b)>;
1627def atomic_load_add_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1628  (atomic_load_add_i64 node:$a, node:$b)>;
1629def atomic_load_add_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1630  (atomic_load_add_i64 node:$a, node:$b)>;
1631def atomic_load_add_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1632  (atomic_load_add_i64 node:$a, node:$b)>;
1633def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1634  (atomic_load_fadd node:$a, node:$b)>;
1635def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1636  (atomic_load_fadd node:$a, node:$b)>;
1637def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1638  (atomic_load_fadd node:$a, node:$b)>;
1639
1640defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1641  atomic_load_add_i32_g, i32imm, imm>;
1642defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1643  atomic_load_add_i32_s, i32imm, imm>;
1644defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1645  atomic_load_add_i32_gen, i32imm, imm>;
1646defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1647  ".add", atomic_load_add_i32_gen, i32imm, imm>;
1648
1649defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1650  atomic_load_add_i64_g, i64imm, imm>;
1651defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1652  atomic_load_add_i64_s, i64imm, imm>;
1653defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1654  atomic_load_add_i64_gen, i64imm, imm>;
1655defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1656  ".add", atomic_load_add_i64_gen, i64imm, imm>;
1657
1658defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
1659  atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1660defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
1661  atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1662defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
1663  atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1664
1665defm INT_PTX_ATOM_ADD_G_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".global", ".bf16", ".add.noftz",
1666  atomic_load_add_g, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
1667defm INT_PTX_ATOM_ADD_S_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".shared", ".bf16", ".add.noftz",
1668  atomic_load_add_s, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
1669defm INT_PTX_ATOM_ADD_GEN_BF16 : F_ATOMIC_2<bf16, Int16Regs, "", ".bf16", ".add.noftz",
1670  atomic_load_add_gen, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
1671
1672defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
1673  atomic_load_add_g, f32imm, fpimm>;
1674defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
1675  atomic_load_add_s, f32imm, fpimm>;
1676defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
1677  atomic_load_add_gen, f32imm, fpimm>;
1678
1679defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
1680  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1681defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
1682  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1683defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
1684  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1685
1686// atom_sub
1687
1688def atomic_load_sub_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1689  (atomic_load_sub_i32 node:$a, node:$b)>;
1690def atomic_load_sub_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1691  (atomic_load_sub_i32 node:$a, node:$b)>;
1692def atomic_load_sub_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1693  (atomic_load_sub_i32 node:$a, node:$b)>;
1694def atomic_load_sub_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1695  (atomic_load_sub_i64 node:$a, node:$b)>;
1696def atomic_load_sub_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1697  (atomic_load_sub_i64 node:$a, node:$b)>;
1698def atomic_load_sub_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1699  (atomic_load_sub_i64 node:$a, node:$b)>;
1700
1701defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1702  atomic_load_sub_i32_g>;
1703defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1704  atomic_load_sub_i64_g>;
1705defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1706  atomic_load_sub_i32_gen>;
1707defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1708  ".add", atomic_load_sub_i32_gen>;
1709defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1710  atomic_load_sub_i32_s>;
1711defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1712  atomic_load_sub_i64_s>;
1713defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1714  atomic_load_sub_i64_gen>;
1715defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1716  ".add", atomic_load_sub_i64_gen>;
1717
1718// atom_swap
1719
1720def atomic_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1721  (atomic_swap_i32 node:$a, node:$b)>;
1722def atomic_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1723  (atomic_swap_i32 node:$a, node:$b)>;
1724def atomic_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1725  (atomic_swap_i32 node:$a, node:$b)>;
1726def atomic_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1727  (atomic_swap_i64 node:$a, node:$b)>;
1728def atomic_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1729  (atomic_swap_i64 node:$a, node:$b)>;
1730def atomic_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1731  (atomic_swap_i64 node:$a, node:$b)>;
1732
1733defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1734  atomic_swap_i32_g, i32imm, imm>;
1735defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1736  atomic_swap_i32_s, i32imm, imm>;
1737defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1738  atomic_swap_i32_gen, i32imm, imm>;
1739defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1740  ".exch", atomic_swap_i32_gen, i32imm, imm>;
1741defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1742  atomic_swap_i64_g, i64imm, imm>;
1743defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1744  atomic_swap_i64_s, i64imm, imm>;
1745defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1746  atomic_swap_i64_gen, i64imm, imm>;
1747defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1748  ".exch", atomic_swap_i64_gen, i64imm, imm>;
1749
1750// atom_max
1751
1752def atomic_load_max_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1753  , (atomic_load_max_i32 node:$a, node:$b)>;
1754def atomic_load_max_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1755  (atomic_load_max_i32 node:$a, node:$b)>;
1756def atomic_load_max_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1757  (atomic_load_max_i32 node:$a, node:$b)>;
1758def atomic_load_max_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1759  , (atomic_load_max_i64 node:$a, node:$b)>;
1760def atomic_load_max_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1761  (atomic_load_max_i64 node:$a, node:$b)>;
1762def atomic_load_max_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1763  (atomic_load_max_i64 node:$a, node:$b)>;
1764def atomic_load_umax_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1765  (atomic_load_umax_i32 node:$a, node:$b)>;
1766def atomic_load_umax_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1767  (atomic_load_umax_i32 node:$a, node:$b)>;
1768def atomic_load_umax_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1769  (atomic_load_umax_i32 node:$a, node:$b)>;
1770def atomic_load_umax_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1771  (atomic_load_umax_i64 node:$a, node:$b)>;
1772def atomic_load_umax_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1773  (atomic_load_umax_i64 node:$a, node:$b)>;
1774def atomic_load_umax_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1775  (atomic_load_umax_i64 node:$a, node:$b)>;
1776
1777defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1778  ".max", atomic_load_max_i32_g, i32imm, imm>;
1779defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1780  ".max", atomic_load_max_i32_s, i32imm, imm>;
1781defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1782  atomic_load_max_i32_gen, i32imm, imm>;
1783defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1784  ".s32", ".max", atomic_load_max_i32_gen, i32imm, imm>;
1785defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1786  ".max", atomic_load_max_i64_g, i64imm, imm, [hasSM<32>]>;
1787defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1788  ".max", atomic_load_max_i64_s, i64imm, imm, [hasSM<32>]>;
1789defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1790  atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
1791defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1792  ".s64", ".max", atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
1793defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1794  ".max", atomic_load_umax_i32_g, i32imm, imm>;
1795defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1796  ".max", atomic_load_umax_i32_s, i32imm, imm>;
1797defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1798  atomic_load_umax_i32_gen, i32imm, imm>;
1799defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1800  ".u32", ".max", atomic_load_umax_i32_gen, i32imm, imm>;
1801defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1802  ".max", atomic_load_umax_i64_g, i64imm, imm, [hasSM<32>]>;
1803defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1804  ".max", atomic_load_umax_i64_s, i64imm, imm, [hasSM<32>]>;
1805defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1806  atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
1807defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1808  ".u64", ".max", atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
1809
1810// atom_min
1811
1812def atomic_load_min_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1813  (atomic_load_min_i32 node:$a, node:$b)>;
1814def atomic_load_min_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1815  (atomic_load_min_i32 node:$a, node:$b)>;
1816def atomic_load_min_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1817  (atomic_load_min_i32 node:$a, node:$b)>;
1818def atomic_load_min_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1819  (atomic_load_min_i64 node:$a, node:$b)>;
1820def atomic_load_min_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1821  (atomic_load_min_i64 node:$a, node:$b)>;
1822def atomic_load_min_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1823  (atomic_load_min_i64 node:$a, node:$b)>;
1824def atomic_load_umin_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1825  (atomic_load_umin_i32 node:$a, node:$b)>;
1826def atomic_load_umin_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1827  (atomic_load_umin_i32 node:$a, node:$b)>;
1828def atomic_load_umin_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1829  (atomic_load_umin_i32 node:$a, node:$b)>;
1830def atomic_load_umin_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1831  (atomic_load_umin_i64 node:$a, node:$b)>;
1832def atomic_load_umin_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1833  (atomic_load_umin_i64 node:$a, node:$b)>;
1834def atomic_load_umin_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1835  (atomic_load_umin_i64 node:$a, node:$b)>;
1836
1837defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1838  ".min", atomic_load_min_i32_g, i32imm, imm>;
1839defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1840  ".min", atomic_load_min_i32_s, i32imm, imm>;
1841defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1842  atomic_load_min_i32_gen, i32imm, imm>;
1843defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1844  ".s32", ".min", atomic_load_min_i32_gen, i32imm, imm>;
1845defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1846  ".min", atomic_load_min_i64_g, i64imm, imm, [hasSM<32>]>;
1847defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1848  ".min", atomic_load_min_i64_s, i64imm, imm, [hasSM<32>]>;
1849defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1850  atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
1851defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1852  ".s64", ".min", atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
1853defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1854  ".min", atomic_load_umin_i32_g, i32imm, imm>;
1855defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1856  ".min", atomic_load_umin_i32_s, i32imm, imm>;
1857defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1858  atomic_load_umin_i32_gen, i32imm, imm>;
1859defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1860  ".u32", ".min", atomic_load_umin_i32_gen, i32imm, imm>;
1861defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1862  ".min", atomic_load_umin_i64_g, i64imm, imm, [hasSM<32>]>;
1863defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1864  ".min", atomic_load_umin_i64_s, i64imm, imm, [hasSM<32>]>;
1865defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1866  atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
1867defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1868  ".u64", ".min", atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
1869
1870// atom_inc  atom_dec
1871
1872def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1873  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1874def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1875  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1876def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1877  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1878def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1879  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1880def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1881  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1882def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1883  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1884
1885defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
1886  atomic_load_inc_32_g, i32imm, imm>;
1887defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
1888  atomic_load_inc_32_s, i32imm, imm>;
1889defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
1890  atomic_load_inc_32_gen, i32imm, imm>;
1891defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1892  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1893defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
1894  atomic_load_dec_32_g, i32imm, imm>;
1895defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
1896  atomic_load_dec_32_s, i32imm, imm>;
1897defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
1898  atomic_load_dec_32_gen, i32imm, imm>;
1899defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1900  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1901
1902// atom_and
1903
1904def atomic_load_and_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1905  (atomic_load_and_i32 node:$a, node:$b)>;
1906def atomic_load_and_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1907  (atomic_load_and_i32 node:$a, node:$b)>;
1908def atomic_load_and_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1909  (atomic_load_and_i32 node:$a, node:$b)>;
1910def atomic_load_and_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1911  (atomic_load_and_i64 node:$a, node:$b)>;
1912def atomic_load_and_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1913  (atomic_load_and_i64 node:$a, node:$b)>;
1914def atomic_load_and_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1915  (atomic_load_and_i64 node:$a, node:$b)>;
1916
1917defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1918  atomic_load_and_i32_g, i32imm, imm>;
1919defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1920  atomic_load_and_i32_s, i32imm, imm>;
1921defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1922  atomic_load_and_i32_gen, i32imm, imm>;
1923defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1924  ".and", atomic_load_and_i32_gen, i32imm, imm>;
1925defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1926  atomic_load_and_i64_g, i64imm, imm, [hasSM<32>]>;
1927defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1928  atomic_load_and_i64_s, i64imm, imm, [hasSM<32>]>;
1929defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1930  atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
1931defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1932  ".and", atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
1933
1934// atom_or
1935
1936def atomic_load_or_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1937  (atomic_load_or_i32 node:$a, node:$b)>;
1938def atomic_load_or_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1939  (atomic_load_or_i32 node:$a, node:$b)>;
1940def atomic_load_or_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1941  (atomic_load_or_i32 node:$a, node:$b)>;
1942def atomic_load_or_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1943  (atomic_load_or_i64 node:$a, node:$b)>;
1944def atomic_load_or_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1945  (atomic_load_or_i64 node:$a, node:$b)>;
1946def atomic_load_or_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1947  (atomic_load_or_i64 node:$a, node:$b)>;
1948
1949defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1950  atomic_load_or_i32_g, i32imm, imm>;
1951defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1952  atomic_load_or_i32_gen, i32imm, imm>;
1953defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1954  ".or", atomic_load_or_i32_gen, i32imm, imm>;
1955defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1956  atomic_load_or_i32_s, i32imm, imm>;
1957defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1958  atomic_load_or_i64_g, i64imm, imm, [hasSM<32>]>;
1959defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1960  atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
1961defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1962  ".or", atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
1963defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1964  atomic_load_or_i64_s, i64imm, imm, [hasSM<32>]>;
1965
1966// atom_xor
1967
1968def atomic_load_xor_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1969  (atomic_load_xor_i32 node:$a, node:$b)>;
1970def atomic_load_xor_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1971  (atomic_load_xor_i32 node:$a, node:$b)>;
1972def atomic_load_xor_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1973  (atomic_load_xor_i32 node:$a, node:$b)>;
1974def atomic_load_xor_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1975  (atomic_load_xor_i64 node:$a, node:$b)>;
1976def atomic_load_xor_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1977  (atomic_load_xor_i64 node:$a, node:$b)>;
1978def atomic_load_xor_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1979  (atomic_load_xor_i64 node:$a, node:$b)>;
1980
1981defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1982  atomic_load_xor_i32_g, i32imm, imm>;
1983defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1984  atomic_load_xor_i32_s, i32imm, imm>;
1985defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1986  atomic_load_xor_i32_gen, i32imm, imm>;
1987defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1988  ".xor", atomic_load_xor_i32_gen, i32imm, imm>;
1989defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1990  atomic_load_xor_i64_g, i64imm, imm, [hasSM<32>]>;
1991defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1992  atomic_load_xor_i64_s, i64imm, imm, [hasSM<32>]>;
1993defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1994  atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
1995defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1996  ".xor", atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
1997
1998// atom_cas
1999
2000def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2001  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2002def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2003  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2004def atomic_cmp_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2005  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2006def atomic_cmp_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2007  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2008def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2009  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2010def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2011  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2012
2013defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
2014  atomic_cmp_swap_i32_g, i32imm>;
2015defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
2016  atomic_cmp_swap_i32_s, i32imm>;
2017defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
2018  atomic_cmp_swap_i32_gen, i32imm>;
2019defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
2020  ".cas", atomic_cmp_swap_i32_gen, i32imm>;
2021defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
2022  atomic_cmp_swap_i64_g, i64imm>;
2023defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
2024  atomic_cmp_swap_i64_s, i64imm>;
2025defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
2026  atomic_cmp_swap_i64_gen, i64imm>;
2027defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
2028  ".cas", atomic_cmp_swap_i64_gen, i64imm>;
2029
2030// Support for scoped atomic operations.  Matches
2031// int_nvvm_atomic_{op}_{space}_{type}_{scope}
2032// and converts it into the appropriate instruction.
2033// NOTE: not all possible combinations are implemented
2034//  'space' is limited to generic as it's the only one needed to support CUDA.
2035//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
2036class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
2037                  dag ins, dag Operands>
2038      : NVPTXInst<(outs regclass:$result), ins,
2039                  AsmStr,
2040                  [(set (regT regclass:$result), Operands)]>,
2041        Requires<Preds>;
2042
2043// Define instruction variants for all addressing modes.
2044multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
2045                       ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2046                       SDNode Imm, ValueType ImmTy,
2047                       list<Predicate> Preds> {
2048  let AddedComplexity = 1 in {
2049    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2050                      (ins Int16Regs:$src, regclass:$b),
2051                      (Intr (i16 Int16Regs:$src), (regT regclass:$b))>;
2052    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2053                      (ins Int32Regs:$src, regclass:$b),
2054                      (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
2055    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2056                      (ins Int64Regs:$src, regclass:$b),
2057                      (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
2058  }
2059  // tablegen can't infer argument types from Intrinsic (though it can
2060  // from Instruction) so we have to enforce specific type on
2061  // immediates via explicit cast to ImmTy.
2062  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2063                    (ins Int16Regs:$src, ImmType:$b),
2064                    (Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>;
2065  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2066                    (ins Int32Regs:$src, ImmType:$b),
2067                    (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
2068  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2069                    (ins Int64Regs:$src, ImmType:$b),
2070                    (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
2071}
2072
2073multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
2074                       ValueType regT, NVPTXRegClass regclass,
2075                       Operand ImmType, SDNode Imm, ValueType ImmTy,
2076                       list<Predicate> Preds> {
2077  // Variants for register/immediate permutations of $b and $c
2078  let AddedComplexity = 2 in {
2079    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2080                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
2081                      (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2082    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2083                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
2084                      (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
2085  }
2086  let AddedComplexity = 1 in {
2087    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2088                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
2089                      (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2090    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2091                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
2092                      (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
2093    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2094                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
2095                      (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2096    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2097                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
2098                      (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
2099  }
2100  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2101                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
2102                    (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2103  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2104                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
2105                    (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
2106}
2107
2108// Constructs intrinsic name and instruction asm strings.
2109multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
2110                       string ScopeStr, string SpaceStr,
2111                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2112                       ValueType ImmTy, list<Predicate> Preds> {
2113  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2114                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2115                            # "." # OpStr # "." # TypeStr
2116                            # " \t$result, [$src], $b;",
2117                     !cast<Intrinsic>(
2118                            "int_nvvm_atomic_" # OpStr
2119                            # "_" # SpaceStr # "_" # IntTypeStr
2120                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2121                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
2122}
2123multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
2124                       string ScopeStr, string SpaceStr,
2125                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2126                       ValueType ImmTy, list<Predicate> Preds> {
2127  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
2128                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2129                            # "." # OpStr # "." # TypeStr
2130                            # " \t$result, [$src], $b, $c;",
2131                     !cast<Intrinsic>(
2132                            "int_nvvm_atomic_" # OpStr
2133                            # "_" # SpaceStr # "_" # IntTypeStr
2134                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2135                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
2136}
2137
2138// Constructs variants for different address spaces.
2139// For now we only need variants for generic space pointers.
2140multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2141                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2142                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2143   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2144                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
2145}
2146multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2147                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
2148                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2149   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2150                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
2151}
2152
2153// Constructs variants for different scopes of atomic op.
2154multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2155                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2156                       ValueType ImmTy, list<Predicate> Preds> {
2157   // .gpu scope is default and is currently covered by existing
2158   // atomics w/o explicitly specified scope.
2159   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2160                           regT, regclass, ImmType, Imm, ImmTy,
2161                           !listconcat(Preds,[hasAtomScope])>;
2162   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2163                           regT, regclass, ImmType, Imm, ImmTy,
2164                           !listconcat(Preds,[hasAtomScope])>;
2165}
2166multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2167           ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2168           list<Predicate> Preds> {
2169   // No need to define ".gpu"-scoped atomics.  They do the same thing
2170   // as the regular, non-scoped atomics defined elsewhere.
2171   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2172                           regT, regclass, ImmType, Imm, ImmTy,
2173                           !listconcat(Preds,[hasAtomScope])>;
2174   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2175                           regT, regclass, ImmType, Imm, ImmTy,
2176                           !listconcat(Preds,[hasAtomScope])>;
2177}
2178
2179// atom.add
2180multiclass ATOM2_add_impl<string OpStr> {
2181   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2182   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2183   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2184   defm _bf16  : ATOM2S_impl<OpStr, "f", "bf16", bf16, Int16Regs, bf16imm, fpimm, bf16,
2185                            [hasSM<90>, hasPTX<78>]>;
2186   defm _f16  : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
2187                            [hasSM<70>, hasPTX<63>]>;
2188   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
2189                            []>;
2190   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
2191                            [hasAtomAddF64]>;
2192}
2193
2194// atom.{and,or,xor}
2195multiclass ATOM2_bitwise_impl<string OpStr> {
2196   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2197   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
2198                            [hasAtomBitwise64]>;
2199}
2200
2201// atom.exch
2202multiclass ATOM2_exch_impl<string OpStr> {
2203   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2204   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2205}
2206
2207// atom.{min,max}
2208multiclass ATOM2_minmax_impl<string OpStr> {
2209   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
2210   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2211   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
2212                            [hasAtomMinMax64]>;
2213   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
2214                            [hasAtomMinMax64]>;
2215}
2216
2217// atom.{inc,dec}
2218multiclass ATOM2_incdec_impl<string OpStr> {
2219   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
2220}
2221
2222// atom.cas
2223multiclass ATOM3_cas_impl<string OpStr> {
2224   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
2225   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
2226}
2227
2228defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2229defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2230defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2231defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2232defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2233defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2234defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2235defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2236defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
2237defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2238
2239//-----------------------------------
2240// Support for ldu on sm_20 or later
2241//-----------------------------------
2242
2243// Don't annotate ldu instructions as mayLoad, as they load from memory that is
2244// read-only in a kernel.
2245
2246// Scalar
2247
2248multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2249  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2250               !strconcat("ldu.global.", TyStr),
2251                      []>, Requires<[hasLDU]>;
2252  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2253               !strconcat("ldu.global.", TyStr),
2254                        []>, Requires<[hasLDU]>;
2255 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2256               !strconcat("ldu.global.", TyStr),
2257                      []>, Requires<[hasLDU]>;
2258 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2259               !strconcat("ldu.global.", TyStr),
2260                      []>, Requires<[hasLDU]>;
2261 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2262               !strconcat("ldu.global.", TyStr),
2263                        []>, Requires<[hasLDU]>;
2264}
2265
2266defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2267defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2268defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2269defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2270defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2271defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2272
2273// vector
2274
2275// Elementized vector ldu
2276multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2277 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2278                     (ins Int32Regs:$src),
2279                     !strconcat("ldu.global.", TyStr), []>;
2280 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2281                     (ins Int64Regs:$src),
2282                     !strconcat("ldu.global.", TyStr), []>;
2283 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2284                     (ins MEMri:$src),
2285                     !strconcat("ldu.global.", TyStr), []>;
2286 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2287                     (ins MEMri64:$src),
2288                     !strconcat("ldu.global.", TyStr), []>;
2289 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2290                     (ins imemAny:$src),
2291                     !strconcat("ldu.global.", TyStr), []>;
2292}
2293
2294multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2295 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2296                            regclass:$dst4), (ins Int32Regs:$src),
2297               !strconcat("ldu.global.", TyStr), []>;
2298 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2299                            regclass:$dst4), (ins Int64Regs:$src),
2300               !strconcat("ldu.global.", TyStr), []>;
2301 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2302                            regclass:$dst4), (ins MEMri:$src),
2303               !strconcat("ldu.global.", TyStr), []>;
2304 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2305                            regclass:$dst4), (ins MEMri64:$src),
2306               !strconcat("ldu.global.", TyStr), []>;
2307 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2308                            regclass:$dst4), (ins imemAny:$src),
2309               !strconcat("ldu.global.", TyStr), []>;
2310}
2311
2312defm INT_PTX_LDU_G_v2i8_ELE
2313  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2314defm INT_PTX_LDU_G_v2i16_ELE
2315  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2316defm INT_PTX_LDU_G_v2i32_ELE
2317  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2318defm INT_PTX_LDU_G_v2f32_ELE
2319  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2320defm INT_PTX_LDU_G_v2i64_ELE
2321  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2322defm INT_PTX_LDU_G_v2f64_ELE
2323  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2324defm INT_PTX_LDU_G_v4i8_ELE
2325  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2326defm INT_PTX_LDU_G_v4i16_ELE
2327  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2328    Int16Regs>;
2329defm INT_PTX_LDU_G_v4i32_ELE
2330  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2331    Int32Regs>;
2332defm INT_PTX_LDU_G_v4f16_ELE
2333  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2334    Int16Regs>;
2335defm INT_PTX_LDU_G_v4f16x2_ELE
2336  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2337    Int32Regs>;
2338defm INT_PTX_LDU_G_v4f32_ELE
2339  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2340    Float32Regs>;
2341
2342
2343//-----------------------------------
2344// Support for ldg on sm_35 or later
2345//-----------------------------------
2346
2347// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2348// non-coherent texture cache, and therefore the values read must be read-only
2349// during the lifetime of the kernel.
2350
2351multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2352  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2353               !strconcat("ld.global.nc.", TyStr),
2354                      []>, Requires<[hasLDG]>;
2355  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2356               !strconcat("ld.global.nc.", TyStr),
2357                        []>, Requires<[hasLDG]>;
2358 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2359               !strconcat("ld.global.nc.", TyStr),
2360                      []>, Requires<[hasLDG]>;
2361 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2362               !strconcat("ld.global.nc.", TyStr),
2363                      []>, Requires<[hasLDG]>;
2364 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2365               !strconcat("ld.global.nc.", TyStr),
2366                        []>, Requires<[hasLDG]>;
2367}
2368
2369defm INT_PTX_LDG_GLOBAL_i8
2370  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2371defm INT_PTX_LDG_GLOBAL_i16
2372  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2373defm INT_PTX_LDG_GLOBAL_i32
2374  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2375defm INT_PTX_LDG_GLOBAL_i64
2376  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2377defm INT_PTX_LDG_GLOBAL_f32
2378  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2379defm INT_PTX_LDG_GLOBAL_f64
2380  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2381
2382// vector
2383
2384// Elementized vector ldg
2385multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2386 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2387                     (ins Int32Regs:$src),
2388                     !strconcat("ld.global.nc.", TyStr), []>;
2389 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2390                     (ins Int64Regs:$src),
2391                     !strconcat("ld.global.nc.", TyStr), []>;
2392 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2393                     (ins MEMri:$src),
2394                     !strconcat("ld.global.nc.", TyStr), []>;
2395 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2396                     (ins MEMri64:$src),
2397                     !strconcat("ld.global.nc.", TyStr), []>;
2398 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2399                     (ins imemAny:$src),
2400                     !strconcat("ld.global.nc.", TyStr), []>;
2401}
2402
2403multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2404  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2405                              regclass:$dst4), (ins Int32Regs:$src),
2406               !strconcat("ld.global.nc.", TyStr), []>;
2407  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2408                               regclass:$dst4), (ins Int64Regs:$src),
2409               !strconcat("ld.global.nc.", TyStr), []>;
2410  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2411                              regclass:$dst4), (ins MEMri:$src),
2412               !strconcat("ld.global.nc.", TyStr), []>;
2413  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2414                              regclass:$dst4), (ins MEMri64:$src),
2415               !strconcat("ld.global.nc.", TyStr), []>;
2416  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2417                             regclass:$dst4), (ins imemAny:$src),
2418               !strconcat("ld.global.nc.", TyStr), []>;
2419}
2420
2421// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2422defm INT_PTX_LDG_G_v2i8_ELE
2423  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2424defm INT_PTX_LDG_G_v2i16_ELE
2425  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2426defm INT_PTX_LDG_G_v2i32_ELE
2427  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2428defm INT_PTX_LDG_G_v2f32_ELE
2429  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2430defm INT_PTX_LDG_G_v2i64_ELE
2431  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2432defm INT_PTX_LDG_G_v2f64_ELE
2433  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2434defm INT_PTX_LDG_G_v4i8_ELE
2435  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2436defm INT_PTX_LDG_G_v4i16_ELE
2437  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2438defm INT_PTX_LDG_G_v4i32_ELE
2439  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2440defm INT_PTX_LDG_G_v4f32_ELE
2441  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2442
2443
2444multiclass NG_TO_G<string Str, Intrinsic Intrin, Predicate ShortPtr> {
2445   def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2446          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2447      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2448   def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2449          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2450      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2451   def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2452          "{{ .reg .b64 %tmp;\n\t"
2453          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2454          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2455      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2456      Requires<[ShortPtr]>;
2457}
2458
2459multiclass G_TO_NG<string Str, Intrinsic Intrin, Predicate ShortPtr> {
2460   def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2461          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2462      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2463   def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2464          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2465      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2466   def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2467          "{{ .reg .b64 %tmp;\n\t"
2468          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2469          #"  cvt.u32.u64 \t$result, %tmp; }}",
2470      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2471      Requires<[ShortPtr]>;
2472}
2473
2474defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>;
2475defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>;
2476defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>;
2477defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>;
2478defm cvta_param  : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>;
2479
2480defm cvta_to_local  : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>;
2481defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>;
2482defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>;
2483defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>;
2484
2485// nvvm.ptr.gen.to.param
2486def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2487  (ins Int32Regs:$src),
2488                        "mov.u32 \t$result, $src;",
2489                              [(set Int32Regs:$result,
2490                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2491def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2492  (ins Int64Regs:$src),
2493                        "mov.u64 \t$result, $src;",
2494                              [(set Int64Regs:$result,
2495                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2496
2497
2498// nvvm.move intrinsicc
2499def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2500                             "mov.b16 \t$r, $s;",
2501                             [(set Int16Regs:$r,
2502                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2503def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2504                             "mov.b32 \t$r, $s;",
2505                             [(set Int32Regs:$r,
2506                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2507def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2508                             "mov.b64 \t$r, $s;",
2509                             [(set Int64Regs:$r,
2510                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2511def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2512                             "mov.f32 \t$r, $s;",
2513                             [(set Float32Regs:$r,
2514                               (int_nvvm_move_float Float32Regs:$s))]>;
2515def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2516                             "mov.f64 \t$r, $s;",
2517                             [(set Float64Regs:$r,
2518                               (int_nvvm_move_double Float64Regs:$s))]>;
2519def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2520                             "mov.u32 \t$r, $s;",
2521                             [(set Int32Regs:$r,
2522                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2523def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2524                             "mov.u64 \t$r, $s;",
2525                             [(set Int64Regs:$r,
2526                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2527
2528// @TODO: Are these actually needed, or will we always just see symbols
2529// copied to registers first?
2530/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2531                             "mov.u32 \t$r, $s;",
2532                             [(set Int32Regs:$r,
2533                             (int_nvvm_move_ptr texternalsym:$s))]>;
2534def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2535                             "mov.u64 \t$r, $s;",
2536                             [(set Int64Regs:$r,
2537                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2538
2539
2540// MoveParam        %r1, param
2541// ptr_local_to_gen %r2, %r1
2542// ptr_gen_to_local %r3, %r2
2543// ->
2544// mov %r1, param
2545
2546// @TODO: Revisit this.  There is a type
2547// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2548// instructions are not currently defined. However, we can use the ptr
2549// variants and the asm printer will do the right thing.
2550def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2551                (MoveParam texternalsym:$src)))),
2552               (nvvm_move_ptr64  texternalsym:$src)>;
2553def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2554                (MoveParam texternalsym:$src)))),
2555               (nvvm_move_ptr32  texternalsym:$src)>;
2556
2557def texsurf_handles
2558  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2559              "mov.u64 \t$result, $src;", []>;
2560
2561//-----------------------------------
2562// Compiler Error Warn
2563// - Just ignore them in codegen
2564//-----------------------------------
2565
2566def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2567                "// llvm.nvvm.compiler.warn()",
2568                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2569def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2570                "// llvm.nvvm.compiler.warn()",
2571                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2572def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2573                "// llvm.nvvm.compiler.error()",
2574                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2575def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2576                "// llvm.nvvm.compiler.error()",
2577                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2578
2579
2580// isspacep
2581
2582multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
2583  def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2584              "isspacep." # suffix # "\t$d, $a;",
2585              [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
2586    Requires<Preds>;
2587  def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2588              "isspacep." # suffix # "\t$d, $a;",
2589              [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
2590    Requires<Preds>;
2591}
2592
2593defm isspace_const  : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
2594defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
2595defm isspace_local  : ISSPACEP<"local", int_nvvm_isspacep_local>;
2596defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
2597defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
2598                                       int_nvvm_isspacep_shared_cluster,
2599                                       [hasPTX<78>, hasSM<90>]>;
2600
2601// Special register reads
2602def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2603                            (ins SpecialRegs:$r),
2604                            "mov.b32 \t$d, $r;", []>;
2605
2606def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2607def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2608def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2609def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2610def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2611def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2612def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2613def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2614def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2615def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2616def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2617def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2618def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2619def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2620def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2621def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2622def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2623def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2624def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2625def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2626def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2627def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2628def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2629def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2630def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2631def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2632def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2633def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2634def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2635def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2636def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2637def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2638
2639
2640// rotate builtin support
2641
2642def ROTATE_B32_HW_IMM
2643  : NVPTXInst<(outs Int32Regs:$dst),
2644              (ins  Int32Regs:$src, i32imm:$amt),
2645              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2646              [(set Int32Regs:$dst,
2647                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2648              Requires<[hasHWROT32]> ;
2649
2650def ROTATE_B32_HW_REG
2651  : NVPTXInst<(outs Int32Regs:$dst),
2652              (ins  Int32Regs:$src, Int32Regs:$amt),
2653              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2654              [(set Int32Regs:$dst,
2655                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2656              Requires<[hasHWROT32]> ;
2657
2658def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2659          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2660      Requires<[noHWROT32]> ;
2661
2662def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2663          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2664      Requires<[noHWROT32]> ;
2665
2666let hasSideEffects = false in {
2667  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2668    !strconcat("{{\n\t",
2669               ".reg .b32 %dummy;\n\t",
2670               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2671               "}}"),
2672          []> ;
2673
2674  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2675    !strconcat("{{\n\t",
2676               ".reg .b32 %dummy;\n\t",
2677               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2678               "}}"),
2679          []> ;
2680}
2681
2682let hasSideEffects = false in {
2683  def PACK_TWO_INT32
2684    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2685                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2686}
2687
2688def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2689          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2690                          (GET_LO_INT64 Int64Regs:$src))> ;
2691
2692// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2693// no side effects.
2694let hasSideEffects = false in {
2695  def SHF_L_WRAP_B32_IMM
2696    : NVPTXInst<(outs Int32Regs:$dst),
2697                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2698                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2699      Requires<[hasHWROT32]>;
2700
2701  def SHF_L_WRAP_B32_REG
2702    : NVPTXInst<(outs Int32Regs:$dst),
2703                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2704                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2705      Requires<[hasHWROT32]>;
2706
2707  def SHF_R_WRAP_B32_IMM
2708    : NVPTXInst<(outs Int32Regs:$dst),
2709                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2710                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2711      Requires<[hasHWROT32]>;
2712
2713  def SHF_R_WRAP_B32_REG
2714    : NVPTXInst<(outs Int32Regs:$dst),
2715                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2716                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2717      Requires<[hasHWROT32]>;
2718}
2719
2720// HW version of rotate 64
2721def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2722          (PACK_TWO_INT32
2723            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2724                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2725            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2726                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2727      Requires<[hasHWROT32]>;
2728
2729def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2730          (PACK_TWO_INT32
2731            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2732                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2733            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2734                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2735      Requires<[hasHWROT32]>;
2736
2737
2738def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2739          (PACK_TWO_INT32
2740            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2741                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2742            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2743                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2744      Requires<[hasHWROT32]>;
2745
2746def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2747          (PACK_TWO_INT32
2748            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2749                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2750            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2751                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2752      Requires<[hasHWROT32]>;
2753
2754// SW version of rotate 64
2755def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2756          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2757      Requires<[noHWROT32]>;
2758def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2759          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2760      Requires<[noHWROT32]>;
2761def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2762          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2763      Requires<[noHWROT32]>;
2764def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2765          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2766      Requires<[noHWROT32]>;
2767
2768
2769//-----------------------------------
2770// Texture Intrinsics
2771//-----------------------------------
2772
2773// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2774// also defined in NVPTXReplaceImageHandles.cpp
2775
2776// texmode_independent
2777let IsTex = true, IsTexModeUnified = false in {
2778// Texture fetch instructions using handles
2779
2780class TEX_1D_base<string inst, NVPTXRegClass outtype,
2781                  NVPTXRegClass intype, dag texsamp>
2782    : NVPTXInst<(outs outtype:$r, outtype:$g,
2783                      outtype:$b, outtype:$a),
2784                 !con(texsamp, (ins intype:$x)),
2785                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2786                 []>;
2787
2788multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2789  def _RR : TEX_1D_base<inst, outtype, intype,
2790                        (ins Int64Regs:$t, Int64Regs:$s)>;
2791  def _RI : TEX_1D_base<inst, outtype, intype,
2792                        (ins Int64Regs:$t, i64imm:$s)>;
2793  def _IR : TEX_1D_base<inst, outtype, intype,
2794                        (ins i64imm:$t, Int64Regs:$s)>;
2795  def _II : TEX_1D_base<inst, outtype, intype,
2796                        (ins i64imm:$t, i64imm:$s)>;
2797}
2798
2799defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2800defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2801defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2802defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2803defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2804defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2805
2806class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2807                        NVPTXRegClass intype, dag texsamp>
2808    : NVPTXInst<(outs outtype:$r, outtype:$g,
2809                      outtype:$b, outtype:$a),
2810                 !con(texsamp, (ins intype:$x, intype:$lod)),
2811                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2812                 []>;
2813
2814multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2815                        NVPTXRegClass intype> {
2816  def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2817                              (ins Int64Regs:$t, Int64Regs:$s)>;
2818  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2819                              (ins Int64Regs:$t, i64imm:$s)>;
2820  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2821                              (ins i64imm:$t, Int64Regs:$s)>;
2822  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2823                              (ins i64imm:$t, i64imm:$s)>;
2824}
2825
2826defm TEX_1D_F32_F32_LEVEL :
2827  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2828defm TEX_1D_S32_F32_LEVEL :
2829  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2830defm TEX_1D_U32_F32_LEVEL :
2831  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2832
2833class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2834                       NVPTXRegClass intype, dag texsamp>
2835    : NVPTXInst<(outs outtype:$r, outtype:$g,
2836                      outtype:$b, outtype:$a),
2837                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2838                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2839                        " \\{$gradx\\}, \\{$grady\\};",
2840                 []>;
2841
2842multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2843                       NVPTXRegClass intype> {
2844  def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2845                             (ins Int64Regs:$t, Int64Regs:$s)>;
2846  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2847                             (ins Int64Regs:$t, i64imm:$s)>;
2848  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2849                             (ins i64imm:$t, Int64Regs:$s)>;
2850  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2851                             (ins i64imm:$t, i64imm:$s)>;
2852}
2853
2854defm TEX_1D_F32_F32_GRAD
2855  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2856defm TEX_1D_S32_F32_GRAD
2857  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2858defm TEX_1D_U32_F32_GRAD
2859  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2860
2861class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2862                        NVPTXRegClass intype, dag texsamp>
2863    : NVPTXInst<(outs outtype:$r, outtype:$g,
2864                      outtype:$b, outtype:$a),
2865                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2866                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2867                 []>;
2868
2869multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2870                        NVPTXRegClass intype> {
2871  def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2872                              (ins Int64Regs:$t, Int64Regs:$s)>;
2873  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2874                              (ins Int64Regs:$t, i64imm:$s)>;
2875  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2876                              (ins i64imm:$t, Int64Regs:$s)>;
2877  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2878                              (ins i64imm:$t, i64imm:$s)>;
2879}
2880
2881defm TEX_1D_ARRAY_F32_F32
2882  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2883defm TEX_1D_ARRAY_F32_S32
2884  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2885defm TEX_1D_ARRAY_S32_S32
2886  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2887defm TEX_1D_ARRAY_S32_F32
2888  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2889defm TEX_1D_ARRAY_U32_S32
2890  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2891defm TEX_1D_ARRAY_U32_F32
2892  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2893
2894class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2895                              NVPTXRegClass intype, dag texsamp>
2896    : NVPTXInst<(outs outtype:$r, outtype:$g,
2897                      outtype:$b, outtype:$a),
2898                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2899                 inst # " \t\\{$r, $g, $b, $a\\},"
2900                        " [$t, $s, \\{$l, $x\\}], $lod;",
2901                 []>;
2902
2903multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2904                              NVPTXRegClass intype> {
2905  def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2906                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2907  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2908                                    (ins Int64Regs:$t, i64imm:$s)>;
2909  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2910                                    (ins i64imm:$t, Int64Regs:$s)>;
2911  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2912                                    (ins i64imm:$t, i64imm:$s)>;
2913}
2914
2915defm TEX_1D_ARRAY_F32_F32_LEVEL
2916  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2917defm TEX_1D_ARRAY_S32_F32_LEVEL
2918  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2919defm TEX_1D_ARRAY_U32_F32_LEVEL
2920  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2921
2922class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2923                             NVPTXRegClass intype, dag texsamp>
2924    : NVPTXInst<(outs outtype:$r, outtype:$g,
2925                      outtype:$b, outtype:$a),
2926                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2927                                    intype:$gradx, intype:$grady)),
2928                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2929                        " \\{$gradx\\}, \\{$grady\\};",
2930                 []>;
2931
2932multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2933                             NVPTXRegClass intype> {
2934  def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2935                                   (ins Int64Regs:$t, Int64Regs:$s)>;
2936  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2937                                   (ins Int64Regs:$t, i64imm:$s)>;
2938  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2939                                   (ins i64imm:$t, Int64Regs:$s)>;
2940  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2941                                   (ins i64imm:$t, i64imm:$s)>;
2942}
2943
2944defm TEX_1D_ARRAY_F32_F32_GRAD
2945  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2946defm TEX_1D_ARRAY_S32_F32_GRAD
2947  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2948defm TEX_1D_ARRAY_U32_F32_GRAD
2949  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2950
2951class TEX_2D_base<string inst, NVPTXRegClass outtype,
2952                  NVPTXRegClass intype, dag texsamp>
2953    : NVPTXInst<(outs outtype:$r, outtype:$g,
2954                      outtype:$b, outtype:$a),
2955                 !con(texsamp, (ins intype:$x, intype:$y)),
2956                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2957                 []>;
2958
2959multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2960  def _RR : TEX_2D_base<inst, outtype, intype,
2961                        (ins Int64Regs:$t, Int64Regs:$s)>;
2962  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2963  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2964  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2965}
2966
2967defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2968defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2969defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2970defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2971defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2972defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2973
2974class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2975                        NVPTXRegClass intype, dag texsamp>
2976    : NVPTXInst<(outs outtype:$r, outtype:$g,
2977                      outtype:$b, outtype:$a),
2978                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2979                 inst # " \t\\{$r, $g, $b, $a\\},"
2980                        " [$t, $s, \\{$x, $y\\}], $lod;",
2981                 []>;
2982
2983multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2984                        NVPTXRegClass intype> {
2985  def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2986                              (ins Int64Regs:$t, Int64Regs:$s)>;
2987  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2988                              (ins Int64Regs:$t, i64imm:$s)>;
2989  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2990                              (ins i64imm:$t, Int64Regs:$s)>;
2991  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2992                              (ins i64imm:$t, i64imm:$s)>;
2993}
2994
2995defm TEX_2D_F32_F32_LEVEL :
2996  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2997defm TEX_2D_S32_F32_LEVEL :
2998  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2999defm TEX_2D_U32_F32_LEVEL :
3000  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3001
3002class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3003                       NVPTXRegClass intype, dag texsamp>
3004    : NVPTXInst<(outs outtype:$r, outtype:$g,
3005                      outtype:$b, outtype:$a),
3006                 !con(texsamp, (ins intype:$x, intype:$y,
3007                                    intype:$gradx0, intype:$gradx1,
3008                                    intype:$grady0, intype:$grady1)),
3009                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
3010                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3011                 []>;
3012
3013multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
3014                       NVPTXRegClass intype> {
3015  def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
3016                              (ins Int64Regs:$t, Int64Regs:$s)>;
3017  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
3018                              (ins Int64Regs:$t, i64imm:$s)>;
3019  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
3020                              (ins i64imm:$t, Int64Regs:$s)>;
3021  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
3022                              (ins i64imm:$t, i64imm:$s)>;
3023}
3024
3025defm TEX_2D_F32_F32_GRAD :
3026  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3027defm TEX_2D_S32_F32_GRAD :
3028  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3029defm TEX_2D_U32_F32_GRAD :
3030  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3031
3032class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3033                        NVPTXRegClass intype, dag texsamp>
3034    : NVPTXInst<(outs outtype:$r, outtype:$g,
3035                      outtype:$b, outtype:$a),
3036                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
3037                 inst # " \t\\{$r, $g, $b, $a\\},"
3038                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
3039                 []>;
3040
3041multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
3042                        NVPTXRegClass intype> {
3043  def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
3044                              (ins Int64Regs:$t, Int64Regs:$s)>;
3045  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
3046                              (ins Int64Regs:$t, i64imm:$s)>;
3047  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
3048                              (ins i64imm:$t, Int64Regs:$s)>;
3049  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
3050                              (ins i64imm:$t, i64imm:$s)>;
3051}
3052
3053defm TEX_2D_ARRAY_F32_F32
3054  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3055defm TEX_2D_ARRAY_F32_S32
3056  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3057defm TEX_2D_ARRAY_S32_S32
3058  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3059defm TEX_2D_ARRAY_S32_F32
3060  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3061defm TEX_2D_ARRAY_U32_S32
3062  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3063defm TEX_2D_ARRAY_U32_F32
3064  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3065
3066class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3067                              NVPTXRegClass intype, dag texsamp>
3068    : NVPTXInst<(outs outtype:$r, outtype:$g,
3069                      outtype:$b, outtype:$a),
3070                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3071                                    intype:$lod)),
3072                 inst # " \t\\{$r, $g, $b, $a\\},"
3073                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
3074                 []>;
3075
3076multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3077                              NVPTXRegClass intype> {
3078  def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3079                              (ins Int64Regs:$t, Int64Regs:$s)>;
3080  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3081                              (ins Int64Regs:$t, i64imm:$s)>;
3082  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3083                              (ins i64imm:$t, Int64Regs:$s)>;
3084  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3085                              (ins i64imm:$t, i64imm:$s)>;
3086}
3087
3088defm TEX_2D_ARRAY_F32_F32_LEVEL
3089  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3090defm TEX_2D_ARRAY_S32_F32_LEVEL
3091  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3092defm TEX_2D_ARRAY_U32_F32_LEVEL
3093  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3094
3095class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3096                             NVPTXRegClass intype, dag texsamp>
3097    : NVPTXInst<(outs outtype:$r, outtype:$g,
3098                      outtype:$b, outtype:$a),
3099                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3100                                    intype:$gradx0, intype:$gradx1,
3101                                    intype:$grady0, intype:$grady1)),
3102                 inst # " \t\\{$r, $g, $b, $a\\},"
3103                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
3104                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3105                 []>;
3106
3107multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3108                             NVPTXRegClass intype> {
3109  def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3110                              (ins Int64Regs:$t, Int64Regs:$s)>;
3111  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3112                              (ins Int64Regs:$t, i64imm:$s)>;
3113  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3114                              (ins i64imm:$t, Int64Regs:$s)>;
3115  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3116                              (ins i64imm:$t, i64imm:$s)>;
3117}
3118
3119defm TEX_2D_ARRAY_F32_F32_GRAD
3120  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3121defm TEX_2D_ARRAY_S32_F32_GRAD
3122  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3123defm TEX_2D_ARRAY_U32_F32_GRAD
3124  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3125
3126class TEX_3D_base<string inst, NVPTXRegClass outtype,
3127                  NVPTXRegClass intype, dag texsamp>
3128    : NVPTXInst<(outs outtype:$r, outtype:$g,
3129                      outtype:$b, outtype:$a),
3130                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3131                 inst # " \t\\{$r, $g, $b, $a\\},"
3132                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3133                 []>;
3134
3135multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3136  def _RR : TEX_3D_base<inst, outtype, intype,
3137                              (ins Int64Regs:$t, Int64Regs:$s)>;
3138  def _RI : TEX_3D_base<inst, outtype, intype,
3139                              (ins Int64Regs:$t, i64imm:$s)>;
3140  def _IR : TEX_3D_base<inst, outtype, intype,
3141                              (ins i64imm:$t, Int64Regs:$s)>;
3142  def _II : TEX_3D_base<inst, outtype, intype,
3143                              (ins i64imm:$t, i64imm:$s)>;
3144}
3145
3146defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3147defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3148defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3149defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3150defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3151defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3152
3153class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3154                        NVPTXRegClass intype, dag texsamp>
3155    : NVPTXInst<(outs outtype:$r, outtype:$g,
3156                      outtype:$b, outtype:$a),
3157                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3158                                    intype:$lod)),
3159                 inst # " \t\\{$r, $g, $b, $a\\},"
3160                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3161                 []>;
3162
3163multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3164                        NVPTXRegClass intype> {
3165  def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3166                              (ins Int64Regs:$t, Int64Regs:$s)>;
3167  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3168                              (ins Int64Regs:$t, i64imm:$s)>;
3169  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3170                              (ins i64imm:$t, Int64Regs:$s)>;
3171  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3172                              (ins i64imm:$t, i64imm:$s)>;
3173}
3174
3175defm TEX_3D_F32_F32_LEVEL
3176  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3177defm TEX_3D_S32_F32_LEVEL
3178  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3179defm TEX_3D_U32_F32_LEVEL
3180  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3181
3182class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3183                       NVPTXRegClass intype, dag texsamp>
3184    : NVPTXInst<(outs outtype:$r, outtype:$g,
3185                      outtype:$b, outtype:$a),
3186                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3187                                    intype :$gradx0, intype:$gradx1,
3188                                    intype:$gradx2, intype:$grady0,
3189                                    intype:$grady1, intype:$grady2)),
3190                 inst # " \t\\{$r, $g, $b, $a\\},"
3191                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
3192                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3193                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3194                 []>;
3195
3196multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3197                       NVPTXRegClass intype> {
3198  def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3199                             (ins Int64Regs:$t, Int64Regs:$s)>;
3200  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3201                             (ins Int64Regs:$t, i64imm:$s)>;
3202  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3203                             (ins i64imm:$t, Int64Regs:$s)>;
3204  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3205                             (ins i64imm:$t, i64imm:$s)>;
3206}
3207
3208defm TEX_3D_F32_F32_GRAD
3209  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3210defm TEX_3D_S32_F32_GRAD
3211  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3212defm TEX_3D_U32_F32_GRAD
3213  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3214
3215class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3216                    NVPTXRegClass intype, dag texsamp>
3217    : NVPTXInst<(outs outtype:$r, outtype:$g,
3218                      outtype:$b, outtype:$a),
3219                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3220                 inst # " \t\\{$r, $g, $b, $a\\},"
3221                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3222                 []>;
3223
3224multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3225  def _RR : TEX_CUBE_base<inst, outtype, intype,
3226                          (ins Int64Regs:$t, Int64Regs:$s)>;
3227  def _RI : TEX_CUBE_base<inst, outtype, intype,
3228                          (ins Int64Regs:$t, i64imm:$s)>;
3229  def _IR : TEX_CUBE_base<inst, outtype, intype,
3230                          (ins i64imm:$t, Int64Regs:$s)>;
3231  def _II : TEX_CUBE_base<inst, outtype, intype,
3232                          (ins i64imm:$t, i64imm:$s)>;
3233}
3234
3235defm TEX_CUBE_F32_F32
3236  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3237defm TEX_CUBE_S32_F32
3238  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3239defm TEX_CUBE_U32_F32
3240  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3241
3242class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3243                          NVPTXRegClass intype, dag texsamp>
3244    : NVPTXInst<(outs outtype:$r, outtype:$g,
3245                      outtype:$b, outtype:$a),
3246                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3247                                    intype:$lod)),
3248                 inst # " \t\\{$r, $g, $b, $a\\},"
3249                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3250                 []>;
3251
3252multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3253                          NVPTXRegClass intype> {
3254  def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3255                                (ins Int64Regs:$t, Int64Regs:$s)>;
3256  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3257                                (ins Int64Regs:$t, i64imm:$s)>;
3258  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3259                                (ins i64imm:$t, Int64Regs:$s)>;
3260  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3261                                (ins i64imm:$t, i64imm:$s)>;
3262}
3263
3264defm TEX_CUBE_F32_F32_LEVEL
3265  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3266defm TEX_CUBE_S32_F32_LEVEL
3267  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3268defm TEX_CUBE_U32_F32_LEVEL
3269  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3270
3271class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3272                          NVPTXRegClass intype, dag texsamp>
3273    : NVPTXInst<(outs outtype:$r, outtype:$g,
3274                      outtype:$b, outtype:$a),
3275                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3276                                    intype:$z)),
3277                 inst # " \t\\{$r, $g, $b, $a\\},"
3278                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
3279                 []>;
3280
3281multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3282                          NVPTXRegClass intype> {
3283  def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3284                                (ins Int64Regs:$t, Int64Regs:$s)>;
3285  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3286                                (ins Int64Regs:$t, i64imm:$s)>;
3287  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3288                                (ins i64imm:$t, Int64Regs:$s)>;
3289  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3290                                (ins i64imm:$t, i64imm:$s)>;
3291}
3292
3293defm TEX_CUBE_ARRAY_F32_F32
3294  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3295defm TEX_CUBE_ARRAY_S32_F32
3296  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3297defm TEX_CUBE_ARRAY_U32_F32
3298  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3299
3300class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3301                                NVPTXRegClass intype, dag texsamp>
3302    : NVPTXInst<(outs outtype:$r, outtype:$g,
3303                      outtype:$b, outtype:$a),
3304                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3305                                    intype:$z, intype:$lod)),
3306                 inst # " \t\\{$r, $g, $b, $a\\},"
3307                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3308                 []>;
3309
3310multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3311                                NVPTXRegClass intype> {
3312  def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3313                                      (ins Int64Regs:$t, Int64Regs:$s)>;
3314  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3315                                      (ins Int64Regs:$t, i64imm:$s)>;
3316  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3317                                      (ins i64imm:$t, Int64Regs:$s)>;
3318  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3319                                      (ins i64imm:$t, i64imm:$s)>;
3320}
3321
3322defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3323  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3324                         Float32Regs, Float32Regs>;
3325defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3326  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3327                         Int32Regs, Float32Regs>;
3328defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3329  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3330                         Int32Regs, Float32Regs>;
3331
3332class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3333                   NVPTXRegClass intype, dag texsamp>
3334    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3335                      outtype:$v2, outtype:$v3),
3336                 !con(texsamp, (ins intype:$x, intype:$y)),
3337                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3338                 []>;
3339
3340multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3341  def _RR : TLD4_2D_base<inst, outtype, intype,
3342                         (ins Int64Regs:$t, Int64Regs:$s)>;
3343  def _RI : TLD4_2D_base<inst, outtype, intype,
3344                         (ins Int64Regs:$t, i64imm:$s)>;
3345  def _IR : TLD4_2D_base<inst, outtype, intype,
3346                         (ins i64imm:$t, Int64Regs:$s)>;
3347  def _II : TLD4_2D_base<inst, outtype, intype,
3348                         (ins i64imm:$t, i64imm:$s)>;
3349}
3350
3351defm TLD4_R_2D_F32_F32
3352  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3353defm TLD4_G_2D_F32_F32
3354  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3355defm TLD4_B_2D_F32_F32
3356  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3357defm TLD4_A_2D_F32_F32
3358  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3359
3360defm TLD4_R_2D_S32_F32
3361  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3362defm TLD4_G_2D_S32_F32
3363  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3364defm TLD4_B_2D_S32_F32
3365  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3366defm TLD4_A_2D_S32_F32
3367  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3368
3369defm TLD4_R_2D_U32_F32
3370  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3371defm TLD4_G_2D_U32_F32
3372  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3373defm TLD4_B_2D_U32_F32
3374  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3375defm TLD4_A_2D_U32_F32
3376  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3377
3378}
3379
3380
3381// texmode_unified
3382let IsTex = true, IsTexModeUnified = true in {
3383// Texture fetch instructions using handles
3384
3385class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3386                          NVPTXRegClass intype, dag tex>
3387    : NVPTXInst<(outs outtype:$r, outtype:$g,
3388                      outtype:$b, outtype:$a),
3389                 !con(tex, (ins intype:$x)),
3390                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3391                 []>;
3392
3393multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3394                          NVPTXRegClass intype> {
3395  def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3396  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3397}
3398
3399defm TEX_UNIFIED_1D_F32_S32
3400  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3401defm TEX_UNIFIED_1D_F32_F32
3402  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3403defm TEX_UNIFIED_1D_S32_S32
3404  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3405defm TEX_UNIFIED_1D_S32_F32
3406  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3407defm TEX_UNIFIED_1D_U32_S32
3408  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3409defm TEX_UNIFIED_1D_U32_F32
3410  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3411
3412class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3413                                NVPTXRegClass intype, dag tex>
3414    : NVPTXInst<(outs outtype:$r, outtype:$g,
3415                      outtype:$b, outtype:$a),
3416                 !con(tex, (ins intype:$x, intype:$lod)),
3417                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3418                 []>;
3419
3420multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3421                                NVPTXRegClass intype> {
3422  def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3423  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3424}
3425
3426defm TEX_UNIFIED_1D_F32_F32_LEVEL
3427  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3428defm TEX_UNIFIED_1D_S32_F32_LEVEL
3429  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3430defm TEX_UNIFIED_1D_U32_F32_LEVEL
3431  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3432
3433class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3434                               NVPTXRegClass intype, dag tex>
3435    : NVPTXInst<(outs outtype:$r, outtype:$g,
3436                      outtype:$b, outtype:$a),
3437                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3438                 inst # " \t\\{$r, $g, $b, $a\\},"
3439                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3440                 []>;
3441
3442multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3443                               NVPTXRegClass intype> {
3444  def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3445  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3446}
3447
3448defm TEX_UNIFIED_1D_F32_F32_GRAD
3449  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3450defm TEX_UNIFIED_1D_S32_F32_GRAD
3451  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3452defm TEX_UNIFIED_1D_U32_F32_GRAD
3453  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3454
3455class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3456                                NVPTXRegClass intype, dag tex>
3457    : NVPTXInst<(outs outtype:$r, outtype:$g,
3458                      outtype:$b, outtype:$a),
3459                 !con(tex, (ins Int32Regs:$l, intype:$x)),
3460                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3461                 []>;
3462
3463multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3464                                NVPTXRegClass intype> {
3465  def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3466  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3467}
3468
3469defm TEX_UNIFIED_1D_ARRAY_F32_S32
3470  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3471defm TEX_UNIFIED_1D_ARRAY_F32_F32
3472  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3473defm TEX_UNIFIED_1D_ARRAY_S32_S32
3474  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3475defm TEX_UNIFIED_1D_ARRAY_S32_F32
3476  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3477defm TEX_UNIFIED_1D_ARRAY_U32_S32
3478  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3479defm TEX_UNIFIED_1D_ARRAY_U32_F32
3480  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3481
3482class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3483                                      NVPTXRegClass intype, dag tex>
3484    : NVPTXInst<(outs outtype:$r, outtype:$g,
3485                      outtype:$b, outtype:$a),
3486                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3487                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3488                 []>;
3489
3490multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3491                                      NVPTXRegClass intype> {
3492  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3493                                           (ins Int64Regs:$t)>;
3494  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3495                                           (ins i64imm:$t)>;
3496}
3497
3498defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3499  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3500                               Float32Regs, Float32Regs>;
3501defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3502  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3503                               Int32Regs, Float32Regs>;
3504defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3505  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3506                               Int32Regs, Float32Regs>;
3507
3508class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3509                                     NVPTXRegClass intype, dag tex>
3510    : NVPTXInst<(outs outtype:$r, outtype:$g,
3511                      outtype:$b, outtype:$a),
3512                 !con(tex, (ins Int32Regs:$l, intype:$x,
3513                                intype:$gradx, intype:$grady)),
3514                 inst # " \t\\{$r, $g, $b, $a\\},"
3515                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3516                 []>;
3517
3518multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3519                                     NVPTXRegClass intype> {
3520  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3521                                          (ins Int64Regs:$t)>;
3522  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3523                                          (ins i64imm:$t)>;
3524}
3525
3526defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3527  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3528                              Float32Regs, Float32Regs>;
3529defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3530  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3531                              Int32Regs, Float32Regs>;
3532defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3533  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3534                              Int32Regs, Float32Regs>;
3535
3536class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3537                          NVPTXRegClass intype, dag tex>
3538    : NVPTXInst<(outs outtype:$r, outtype:$g,
3539                      outtype:$b, outtype:$a),
3540                 !con(tex, (ins intype:$x, intype:$y)),
3541                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3542                 []>;
3543
3544multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3545                          NVPTXRegClass intype> {
3546  def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3547  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3548}
3549
3550defm TEX_UNIFIED_2D_F32_S32
3551  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3552defm TEX_UNIFIED_2D_F32_F32
3553  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3554defm TEX_UNIFIED_2D_S32_S32
3555  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3556defm TEX_UNIFIED_2D_S32_F32
3557  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3558defm TEX_UNIFIED_2D_U32_S32
3559  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3560defm TEX_UNIFIED_2D_U32_F32
3561  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3562
3563class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3564                                NVPTXRegClass intype, dag tex>
3565    : NVPTXInst<(outs outtype:$r, outtype:$g,
3566                      outtype:$b, outtype:$a),
3567                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3568                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3569                 []>;
3570
3571multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3572                                NVPTXRegClass intype> {
3573  def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3574  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3575}
3576
3577defm TEX_UNIFIED_2D_F32_F32_LEVEL
3578  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3579defm TEX_UNIFIED_2D_S32_F32_LEVEL
3580  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3581defm TEX_UNIFIED_2D_U32_F32_LEVEL
3582  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3583
3584class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3585                               NVPTXRegClass intype, dag tex>
3586    : NVPTXInst<(outs outtype:$r, outtype:$g,
3587                      outtype:$b, outtype:$a),
3588                 !con(tex, (ins intype:$x, intype:$y,
3589                                intype:$gradx0, intype:$gradx1,
3590                                intype:$grady0, intype:$grady1)),
3591                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3592                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3593                 []>;
3594multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3595                               NVPTXRegClass intype> {
3596  def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3597  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3598}
3599
3600defm TEX_UNIFIED_2D_F32_F32_GRAD
3601  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3602defm TEX_UNIFIED_2D_S32_F32_GRAD
3603  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3604defm TEX_UNIFIED_2D_U32_F32_GRAD
3605  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3606
3607class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3608                                NVPTXRegClass intype, dag tex>
3609    : NVPTXInst<(outs outtype:$r, outtype:$g,
3610                      outtype:$b, outtype:$a),
3611                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3612                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3613                 []>;
3614multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3615                                NVPTXRegClass intype> {
3616  def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3617  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3618}
3619
3620defm TEX_UNIFIED_2D_ARRAY_F32_S32
3621  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3622defm TEX_UNIFIED_2D_ARRAY_F32_F32
3623  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3624defm TEX_UNIFIED_2D_ARRAY_S32_S32
3625  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3626defm TEX_UNIFIED_2D_ARRAY_S32_F32
3627  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3628defm TEX_UNIFIED_2D_ARRAY_U32_S32
3629  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3630defm TEX_UNIFIED_2D_ARRAY_U32_F32
3631  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3632
3633class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3634                                      NVPTXRegClass intype, dag tex>
3635    : NVPTXInst<(outs outtype:$r, outtype:$g,
3636                      outtype:$b, outtype:$a),
3637                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3638                                intype:$lod)),
3639                 inst # " \t\\{$r, $g, $b, $a\\},"
3640                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3641                 []>;
3642multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3643                                      NVPTXRegClass intype> {
3644  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3645                                           (ins Int64Regs:$t)>;
3646  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3647                                           (ins i64imm:$t)>;
3648}
3649
3650defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3651  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3652                               Float32Regs, Float32Regs>;
3653defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3654  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3655                               Int32Regs, Float32Regs>;
3656defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3657  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3658                               Int32Regs, Float32Regs>;
3659
3660class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3661                                     NVPTXRegClass intype, dag tex>
3662    : NVPTXInst<(outs outtype:$r, outtype:$g,
3663                      outtype:$b, outtype:$a),
3664                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3665                                intype:$gradx0, intype:$gradx1,
3666                                intype:$grady0, intype:$grady1)),
3667                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3668                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3669                 []>;
3670multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3671                                     NVPTXRegClass intype> {
3672  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3673                                          (ins Int64Regs:$t)>;
3674  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3675                                          (ins i64imm:$t)>;
3676}
3677
3678defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3679  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3680                              Float32Regs, Float32Regs>;
3681defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3682  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3683                              Int32Regs, Float32Regs>;
3684defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3685  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3686                              Int32Regs, Float32Regs>;
3687
3688class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3689                          NVPTXRegClass intype, dag tex>
3690    : NVPTXInst<(outs outtype:$r, outtype:$g,
3691                      outtype:$b, outtype:$a),
3692                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3693                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3694                 []>;
3695multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3696                          NVPTXRegClass intype> {
3697  def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3698  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3699}
3700
3701defm TEX_UNIFIED_3D_F32_S32
3702  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3703defm TEX_UNIFIED_3D_F32_F32
3704  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3705defm TEX_UNIFIED_3D_S32_S32
3706  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3707defm TEX_UNIFIED_3D_S32_F32
3708  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3709defm TEX_UNIFIED_3D_U32_S32
3710  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3711defm TEX_UNIFIED_3D_U32_F32
3712  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3713
3714class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3715                                NVPTXRegClass intype, dag tex>
3716    : NVPTXInst<(outs outtype:$r, outtype:$g,
3717                      outtype:$b, outtype:$a),
3718                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3719                 inst # " \t\\{$r, $g, $b, $a\\},"
3720                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3721                 []>;
3722multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3723                                NVPTXRegClass intype> {
3724  def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3725  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3726}
3727
3728defm TEX_UNIFIED_3D_F32_F32_LEVEL
3729  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3730defm TEX_UNIFIED_3D_S32_F32_LEVEL
3731  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3732defm TEX_UNIFIED_3D_U32_F32_LEVEL
3733  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3734
3735class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3736                               NVPTXRegClass intype, dag tex>
3737    : NVPTXInst<(outs outtype:$r, outtype:$g,
3738                      outtype:$b, outtype:$a),
3739                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3740                                intype:$gradx0, intype:$gradx1,
3741                                intype:$gradx2, intype:$grady0,
3742                                intype:$grady1, intype:$grady2)),
3743                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3744                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3745                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3746                 []>;
3747multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3748                               NVPTXRegClass intype> {
3749  def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3750  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3751}
3752
3753defm TEX_UNIFIED_3D_F32_F32_GRAD
3754  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3755defm TEX_UNIFIED_3D_S32_F32_GRAD
3756  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3757defm TEX_UNIFIED_3D_U32_F32_GRAD
3758  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3759
3760class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3761                            NVPTXRegClass intype, dag tex>
3762    : NVPTXInst<(outs outtype:$r, outtype:$g,
3763                      outtype:$b, outtype:$a),
3764                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3765                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3766                 []>;
3767multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3768                            NVPTXRegClass intype> {
3769  def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3770  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3771}
3772
3773defm TEX_UNIFIED_CUBE_F32_F32
3774  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3775defm TEX_UNIFIED_CUBE_S32_F32
3776  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3777defm TEX_UNIFIED_CUBE_U32_F32
3778  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3779
3780class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3781                                  NVPTXRegClass intype, dag tex>
3782    : NVPTXInst<(outs outtype:$r, outtype:$g,
3783                      outtype:$b, outtype:$a),
3784                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3785                 inst # " \t\\{$r, $g, $b, $a\\},"
3786                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3787                 []>;
3788multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3789                                  NVPTXRegClass intype> {
3790  def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3791                                       (ins Int64Regs:$t)>;
3792  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3793                                       (ins i64imm:$t)>;
3794}
3795
3796defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3797  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3798                           Float32Regs, Float32Regs>;
3799defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3800  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3801                           Int32Regs, Float32Regs>;
3802defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3803  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3804                           Int32Regs, Float32Regs>;
3805
3806class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3807                                  NVPTXRegClass intype, dag tex>
3808    : NVPTXInst<(outs outtype:$r, outtype:$g,
3809                      outtype:$b, outtype:$a),
3810                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3811                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3812                 []>;
3813multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3814                                  NVPTXRegClass intype> {
3815  def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3816                                       (ins Int64Regs:$t)>;
3817  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3818                                       (ins i64imm:$t)>;
3819}
3820
3821defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3822  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3823defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3824  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3825defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3826  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3827
3828class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3829                                        NVPTXRegClass intype, dag tex>
3830    : NVPTXInst<(outs outtype:$r, outtype:$g,
3831                      outtype:$b, outtype:$a),
3832                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3833                                intype:$lod)),
3834                 inst # " \t\\{$r, $g, $b, $a\\},"
3835                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3836                 []>;
3837multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3838                                        NVPTXRegClass intype> {
3839  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3840                                             (ins Int64Regs:$t)>;
3841  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3842                                             (ins i64imm:$t)>;
3843}
3844
3845defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3846  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3847                                 Float32Regs, Float32Regs>;
3848defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3849  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3850                                 Int32Regs, Float32Regs>;
3851defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3852  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3853                                 Int32Regs, Float32Regs>;
3854
3855class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
3856                                 NVPTXRegClass intype, dag tex>
3857    : NVPTXInst<(outs outtype:$r, outtype:$g,
3858                      outtype:$b, outtype:$a),
3859                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3860                                intype:$gradx0, intype:$gradx1,
3861                                intype:$gradx2, intype:$grady0,
3862                                intype:$grady1, intype:$grady2)),
3863                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3864                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3865                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3866                 []>;
3867
3868multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
3869                                 NVPTXRegClass intype> {
3870  def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3871  def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3872}
3873
3874defm TEX_UNIFIED_CUBE_F32_F32_GRAD
3875  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3876defm TEX_UNIFIED_CUBE_S32_F32_GRAD
3877  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3878defm TEX_UNIFIED_CUBE_U32_F32_GRAD
3879  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3880
3881class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3882                                       NVPTXRegClass intype, dag tex>
3883    : NVPTXInst<(outs outtype:$r, outtype:$g,
3884                      outtype:$b, outtype:$a),
3885                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3886                                intype:$gradx0, intype:$gradx1,
3887                                intype:$gradx2, intype:$grady0,
3888                                intype:$grady1, intype:$grady2)),
3889                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
3890                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3891                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3892                 []>;
3893multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3894                                       NVPTXRegClass intype> {
3895  def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3896                                            (ins Int64Regs:$t)>;
3897  def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
3898                                            (ins i64imm:$t)>;
3899}
3900
3901defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
3902  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
3903                                Float32Regs, Float32Regs>;
3904defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
3905  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
3906                                Int32Regs, Float32Regs>;
3907defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
3908  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
3909                                Int32Regs, Float32Regs>;
3910
3911class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3912                           NVPTXRegClass intype, dag tex>
3913    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3914                      outtype:$v2, outtype:$v3),
3915                 !con(tex, (ins intype:$x, intype:$y)),
3916                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3917                 []>;
3918multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3919                           NVPTXRegClass intype> {
3920  def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3921  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3922}
3923
3924defm TLD4_UNIFIED_R_2D_F32_F32
3925  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3926defm TLD4_UNIFIED_G_2D_F32_F32
3927  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3928defm TLD4_UNIFIED_B_2D_F32_F32
3929  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3930defm TLD4_UNIFIED_A_2D_F32_F32
3931  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3932
3933defm TLD4_UNIFIED_R_2D_S32_F32
3934  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3935defm TLD4_UNIFIED_G_2D_S32_F32
3936  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3937defm TLD4_UNIFIED_B_2D_S32_F32
3938  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3939defm TLD4_UNIFIED_A_2D_S32_F32
3940  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3941
3942defm TLD4_UNIFIED_R_2D_U32_F32
3943  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3944defm TLD4_UNIFIED_G_2D_U32_F32
3945  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3946defm TLD4_UNIFIED_B_2D_U32_F32
3947  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3948defm TLD4_UNIFIED_A_2D_U32_F32
3949  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3950
3951}
3952
3953
3954
3955//=== Surface load instructions
3956
3957let IsSuld = true in {
3958
3959class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3960    : NVPTXInst<(outs outtype:$r),
3961                !con(surf, (ins Int32Regs:$x)),
3962                inst # " \\{$r\\}, [$s, \\{$x\\}];",
3963                []>;
3964multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3965  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3966  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3967}
3968
3969defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3970defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3971defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3972defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3973
3974defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3975defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3976defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3977defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3978
3979defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3980defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3981defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3982defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3983
3984class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3985    : NVPTXInst<(outs outtype:$r),
3986                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3987                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3988                []>;
3989multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3990  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3991  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3992}
3993
3994defm SULD_1D_ARRAY_I8_CLAMP
3995  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3996defm SULD_1D_ARRAY_I16_CLAMP
3997  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3998defm SULD_1D_ARRAY_I32_CLAMP
3999  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
4000defm SULD_1D_ARRAY_I64_CLAMP
4001  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
4002
4003defm SULD_1D_ARRAY_I8_TRAP
4004  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
4005defm SULD_1D_ARRAY_I16_TRAP
4006  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
4007defm SULD_1D_ARRAY_I32_TRAP
4008  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
4009defm SULD_1D_ARRAY_I64_TRAP
4010  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
4011
4012defm SULD_1D_ARRAY_I8_ZERO
4013  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
4014defm SULD_1D_ARRAY_I16_ZERO
4015  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
4016defm SULD_1D_ARRAY_I32_ZERO
4017  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
4018defm SULD_1D_ARRAY_I64_ZERO
4019  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
4020
4021class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
4022    : NVPTXInst<(outs outtype:$r),
4023                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4024                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
4025                []>;
4026multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
4027  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
4028  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
4029}
4030
4031defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
4032defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
4033defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
4034defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
4035
4036defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
4037defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
4038defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
4039defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
4040
4041defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
4042defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
4043defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
4044defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
4045
4046class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
4047    : NVPTXInst<(outs outtype:$r),
4048                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4049                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4050                []>;
4051multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
4052  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
4053  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
4054}
4055
4056defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
4057defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
4058defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
4059defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
4060
4061defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
4062defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
4063defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
4064defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
4065
4066defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
4067defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
4068defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
4069defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
4070
4071class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
4072    : NVPTXInst<(outs outtype:$r),
4073                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4074                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4075                []>;
4076multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
4077  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
4078  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
4079}
4080
4081defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
4082defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
4083defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
4084defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
4085
4086defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
4087defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
4088defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
4089defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
4090
4091defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
4092defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
4093defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
4094defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
4095}
4096
4097let IsSuld = 2 in {
4098
4099class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4100    : NVPTXInst<(outs outtype:$r, outtype:$g),
4101                !con(surf, (ins Int32Regs:$x)),
4102                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
4103                []>;
4104multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
4105  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4106  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
4107}
4108
4109defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
4110defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
4111defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
4112defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
4113
4114defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
4115defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
4116defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
4117defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
4118
4119defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
4120defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
4121defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
4122defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
4123
4124class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4125    : NVPTXInst<(outs outtype:$r, outtype:$g),
4126                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4127                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4128                []>;
4129multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4130  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4131  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4132}
4133
4134defm SULD_1D_ARRAY_V2I8_CLAMP
4135  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
4136defm SULD_1D_ARRAY_V2I16_CLAMP
4137  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
4138defm SULD_1D_ARRAY_V2I32_CLAMP
4139  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
4140defm SULD_1D_ARRAY_V2I64_CLAMP
4141  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
4142
4143defm SULD_1D_ARRAY_V2I8_TRAP
4144  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
4145defm SULD_1D_ARRAY_V2I16_TRAP
4146  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
4147defm SULD_1D_ARRAY_V2I32_TRAP
4148  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
4149defm SULD_1D_ARRAY_V2I64_TRAP
4150  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
4151
4152defm SULD_1D_ARRAY_V2I8_ZERO
4153  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4154defm SULD_1D_ARRAY_V2I16_ZERO
4155  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4156defm SULD_1D_ARRAY_V2I32_ZERO
4157  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4158defm SULD_1D_ARRAY_V2I64_ZERO
4159  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4160
4161class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4162    : NVPTXInst<(outs outtype:$r, outtype:$g),
4163                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4164                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4165                []>;
4166multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4167  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4168  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4169}
4170
4171defm SULD_2D_V2I8_CLAMP
4172  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4173defm SULD_2D_V2I16_CLAMP
4174  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4175defm SULD_2D_V2I32_CLAMP
4176  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4177defm SULD_2D_V2I64_CLAMP
4178  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4179
4180defm SULD_2D_V2I8_TRAP
4181  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4182defm SULD_2D_V2I16_TRAP
4183  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4184defm SULD_2D_V2I32_TRAP
4185  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4186defm SULD_2D_V2I64_TRAP
4187  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4188
4189defm SULD_2D_V2I8_ZERO
4190  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4191defm SULD_2D_V2I16_ZERO
4192  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4193defm SULD_2D_V2I32_ZERO
4194  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4195defm SULD_2D_V2I64_ZERO
4196  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4197
4198class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4199    : NVPTXInst<(outs outtype:$r, outtype:$g),
4200                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4201                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4202                []>;
4203multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4204  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4205  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4206}
4207
4208defm SULD_2D_ARRAY_V2I8_CLAMP
4209  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4210defm SULD_2D_ARRAY_V2I16_CLAMP
4211  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4212defm SULD_2D_ARRAY_V2I32_CLAMP
4213  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4214defm SULD_2D_ARRAY_V2I64_CLAMP
4215  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4216
4217defm SULD_2D_ARRAY_V2I8_TRAP
4218  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4219defm SULD_2D_ARRAY_V2I16_TRAP
4220  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4221defm SULD_2D_ARRAY_V2I32_TRAP
4222  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4223defm SULD_2D_ARRAY_V2I64_TRAP
4224  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4225
4226defm SULD_2D_ARRAY_V2I8_ZERO
4227  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4228defm SULD_2D_ARRAY_V2I16_ZERO
4229  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4230defm SULD_2D_ARRAY_V2I32_ZERO
4231  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4232defm SULD_2D_ARRAY_V2I64_ZERO
4233  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4234
4235class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4236    : NVPTXInst<(outs outtype:$r, outtype:$g),
4237                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4238                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4239                []>;
4240multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4241  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4242  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4243}
4244
4245defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4246defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4247defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4248defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4249
4250defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4251defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4252defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4253defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4254
4255defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4256defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4257defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4258defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4259
4260}
4261
4262let IsSuld = 3 in {
4263
4264class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4265    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4266                !con(surf, (ins Int32Regs:$x)),
4267                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4268                []>;
4269multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4270  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4271  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4272}
4273
4274defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4275defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4276defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4277
4278defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4279defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4280defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4281
4282defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4283defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4284defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4285
4286class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4287    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4288                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4289                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4290                []>;
4291multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4292  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4293  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4294}
4295
4296defm SULD_1D_ARRAY_V4I8_CLAMP
4297  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4298defm SULD_1D_ARRAY_V4I16_CLAMP
4299  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4300defm SULD_1D_ARRAY_V4I32_CLAMP
4301  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4302
4303defm SULD_1D_ARRAY_V4I8_TRAP
4304  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4305defm SULD_1D_ARRAY_V4I16_TRAP
4306  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4307defm SULD_1D_ARRAY_V4I32_TRAP
4308  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4309
4310defm SULD_1D_ARRAY_V4I8_ZERO
4311  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4312defm SULD_1D_ARRAY_V4I16_ZERO
4313  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4314defm SULD_1D_ARRAY_V4I32_ZERO
4315  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4316
4317class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4318    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4319                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4320                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4321                []>;
4322multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4323  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4324  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4325}
4326
4327defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4328defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4329defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4330
4331defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4332defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4333defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4334
4335defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4336defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4337defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4338
4339class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4340    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4341                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4342                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4343                []>;
4344multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4345  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4346  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4347}
4348
4349defm SULD_2D_ARRAY_V4I8_CLAMP
4350  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4351defm SULD_2D_ARRAY_V4I16_CLAMP
4352  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4353defm SULD_2D_ARRAY_V4I32_CLAMP
4354  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4355
4356defm SULD_2D_ARRAY_V4I8_TRAP
4357  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4358defm SULD_2D_ARRAY_V4I16_TRAP
4359  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4360defm SULD_2D_ARRAY_V4I32_TRAP
4361  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4362
4363defm SULD_2D_ARRAY_V4I8_ZERO
4364  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4365defm SULD_2D_ARRAY_V4I16_ZERO
4366  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4367defm SULD_2D_ARRAY_V4I32_ZERO
4368  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4369
4370class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4371    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4372                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4373                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4374                []>;
4375multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4376  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4377  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4378}
4379
4380defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4381defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4382defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4383
4384defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4385defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4386defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4387
4388defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4389defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4390defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4391
4392}
4393
4394//-----------------------------------
4395// Texture Query Intrinsics
4396//-----------------------------------
4397
4398let IsSurfTexQuery = true in {
4399def TXQ_CHANNEL_ORDER_R
4400  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4401              "txq.channel_order.b32 \t$d, [$a];",
4402              []>;
4403def TXQ_CHANNEL_ORDER_I
4404  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4405              "txq.channel_order.b32 \t$d, [$a];",
4406              []>;
4407def TXQ_CHANNEL_DATA_TYPE_R
4408  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4409              "txq.channel_data_type.b32 \t$d, [$a];",
4410              []>;
4411def TXQ_CHANNEL_DATA_TYPE_I
4412  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4413              "txq.channel_data_type.b32 \t$d, [$a];",
4414              []>;
4415def TXQ_WIDTH_R
4416  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4417              "txq.width.b32 \t$d, [$a];",
4418              []>;
4419def TXQ_WIDTH_I
4420  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4421              "txq.width.b32 \t$d, [$a];",
4422              []>;
4423def TXQ_HEIGHT_R
4424  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4425              "txq.height.b32 \t$d, [$a];",
4426              []>;
4427def TXQ_HEIGHT_I
4428  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4429              "txq.height.b32 \t$d, [$a];",
4430              []>;
4431def TXQ_DEPTH_R
4432  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4433              "txq.depth.b32 \t$d, [$a];",
4434              []>;
4435def TXQ_DEPTH_I
4436  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4437              "txq.depth.b32 \t$d, [$a];",
4438              []>;
4439def TXQ_ARRAY_SIZE_R
4440  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4441              "txq.array_size.b32 \t$d, [$a];",
4442              []>;
4443def TXQ_ARRAY_SIZE_I
4444  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4445              "txq.array_size.b32 \t$d, [$a];",
4446              []>;
4447def TXQ_NUM_SAMPLES_R
4448  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4449              "txq.num_samples.b32 \t$d, [$a];",
4450              []>;
4451def TXQ_NUM_SAMPLES_I
4452  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4453              "txq.num_samples.b32 \t$d, [$a];",
4454              []>;
4455def TXQ_NUM_MIPMAP_LEVELS_R
4456  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4457              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4458              []>;
4459def TXQ_NUM_MIPMAP_LEVELS_I
4460  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4461              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4462              []>;
4463}
4464
4465def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4466          (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4467def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4468          (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4469def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4470          (TXQ_WIDTH_R Int64Regs:$a)>;
4471def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4472          (TXQ_HEIGHT_R Int64Regs:$a)>;
4473def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4474          (TXQ_DEPTH_R Int64Regs:$a)>;
4475def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4476          (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4477def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4478          (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4479def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4480          (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4481
4482
4483//-----------------------------------
4484// Surface Query Intrinsics
4485//-----------------------------------
4486
4487let IsSurfTexQuery = true in {
4488def SUQ_CHANNEL_ORDER_R
4489  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4490              "suq.channel_order.b32 \t$d, [$a];",
4491              []>;
4492def SUQ_CHANNEL_ORDER_I
4493  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4494              "suq.channel_order.b32 \t$d, [$a];",
4495              []>;
4496def SUQ_CHANNEL_DATA_TYPE_R
4497  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4498              "suq.channel_data_type.b32 \t$d, [$a];",
4499              []>;
4500def SUQ_CHANNEL_DATA_TYPE_I
4501  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4502              "suq.channel_data_type.b32 \t$d, [$a];",
4503              []>;
4504def SUQ_WIDTH_R
4505  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4506              "suq.width.b32 \t$d, [$a];",
4507              []>;
4508def SUQ_WIDTH_I
4509  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4510              "suq.width.b32 \t$d, [$a];",
4511              []>;
4512def SUQ_HEIGHT_R
4513  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4514              "suq.height.b32 \t$d, [$a];",
4515              []>;
4516def SUQ_HEIGHT_I
4517  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4518              "suq.height.b32 \t$d, [$a];",
4519              []>;
4520def SUQ_DEPTH_R
4521  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4522              "suq.depth.b32 \t$d, [$a];",
4523              []>;
4524def SUQ_DEPTH_I
4525  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4526              "suq.depth.b32 \t$d, [$a];",
4527              []>;
4528def SUQ_ARRAY_SIZE_R
4529  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4530              "suq.array_size.b32 \t$d, [$a];",
4531              []>;
4532def SUQ_ARRAY_SIZE_I
4533  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4534              "suq.array_size.b32 \t$d, [$a];",
4535              []>;
4536}
4537
4538def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4539          (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4540def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4541          (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4542def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4543          (SUQ_WIDTH_R Int64Regs:$a)>;
4544def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4545          (SUQ_HEIGHT_R Int64Regs:$a)>;
4546def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4547          (SUQ_DEPTH_R Int64Regs:$a)>;
4548def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4549          (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4550
4551
4552//===- Handle Query -------------------------------------------------------===//
4553
4554// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4555def ISTYPEP_SAMPLER
4556  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4557              "istypep.samplerref \t$d, $a;",
4558              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4559def ISTYPEP_SURFACE
4560  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4561              "istypep.surfref \t$d, $a;",
4562              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4563def ISTYPEP_TEXTURE
4564  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4565              "istypep.texref \t$d, $a;",
4566              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4567
4568//===- Surface Stores -----------------------------------------------------===//
4569
4570let IsSust = true in {
4571
4572class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4573    : NVPTXInst<(outs),
4574                !con(surf, (ins Int32Regs:$x, intype:$r)),
4575                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4576                []>;
4577multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4578  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4579  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4580}
4581
4582defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4583defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4584defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4585defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4586
4587defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4588defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4589defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4590defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4591
4592defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4593defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4594defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4595defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4596
4597defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4598defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4599defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4600
4601class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4602    : NVPTXInst<(outs),
4603                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4604                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4605                []>;
4606multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4607  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4608  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4609}
4610
4611defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4612defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4613defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4614defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4615
4616defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4617defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4618defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4619defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4620
4621defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4622defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4623defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4624defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4625
4626defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4627defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4628defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4629
4630class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4631    : NVPTXInst<(outs),
4632                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4633                                intype:$b, intype:$a)),
4634                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4635                []>;
4636multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4637  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4638  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4639}
4640
4641defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4642defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4643defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4644
4645defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4646defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4647defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4648
4649defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4650defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4651defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4652
4653defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4654defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4655defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4656
4657class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4658    : NVPTXInst<(outs),
4659                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4660                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4661                []>;
4662multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4663  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4664  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4665}
4666
4667defm SUST_B_1D_ARRAY_B8_CLAMP
4668  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4669defm SUST_B_1D_ARRAY_B16_CLAMP
4670  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4671defm SUST_B_1D_ARRAY_B32_CLAMP
4672  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4673defm SUST_B_1D_ARRAY_B64_CLAMP
4674  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4675
4676defm SUST_B_1D_ARRAY_B8_TRAP
4677  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4678defm SUST_B_1D_ARRAY_B16_TRAP
4679  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4680defm SUST_B_1D_ARRAY_B32_TRAP
4681  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4682defm SUST_B_1D_ARRAY_B64_TRAP
4683  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4684
4685defm SUST_B_1D_ARRAY_B8_ZERO
4686  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4687defm SUST_B_1D_ARRAY_B16_ZERO
4688  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4689defm SUST_B_1D_ARRAY_B32_ZERO
4690  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4691defm SUST_B_1D_ARRAY_B64_ZERO
4692  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4693
4694defm SUST_P_1D_ARRAY_B8_TRAP
4695  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4696defm SUST_P_1D_ARRAY_B16_TRAP
4697  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4698defm SUST_P_1D_ARRAY_B32_TRAP
4699  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4700
4701class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4702    : NVPTXInst<(outs),
4703                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4704                                intype:$r, intype:$g)),
4705                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4706                []>;
4707multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4708  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4709  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4710}
4711
4712defm SUST_B_1D_ARRAY_V2B8_CLAMP
4713  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4714defm SUST_B_1D_ARRAY_V2B16_CLAMP
4715  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4716defm SUST_B_1D_ARRAY_V2B32_CLAMP
4717  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4718defm SUST_B_1D_ARRAY_V2B64_CLAMP
4719  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4720
4721defm SUST_B_1D_ARRAY_V2B8_TRAP
4722  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4723defm SUST_B_1D_ARRAY_V2B16_TRAP
4724  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4725defm SUST_B_1D_ARRAY_V2B32_TRAP
4726  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4727defm SUST_B_1D_ARRAY_V2B64_TRAP
4728  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4729
4730defm SUST_B_1D_ARRAY_V2B8_ZERO
4731  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4732defm SUST_B_1D_ARRAY_V2B16_ZERO
4733  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4734defm SUST_B_1D_ARRAY_V2B32_ZERO
4735  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4736defm SUST_B_1D_ARRAY_V2B64_ZERO
4737  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4738
4739defm SUST_P_1D_ARRAY_V2B8_TRAP
4740  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4741defm SUST_P_1D_ARRAY_V2B16_TRAP
4742  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4743defm SUST_P_1D_ARRAY_V2B32_TRAP
4744  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4745
4746class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4747    : NVPTXInst<(outs),
4748                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4749                                intype:$r, intype:$g, intype:$b, intype:$a)),
4750                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4751                []>;
4752multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4753  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4754  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4755}
4756
4757defm SUST_B_1D_ARRAY_V4B8_CLAMP
4758  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4759defm SUST_B_1D_ARRAY_V4B16_CLAMP
4760  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4761defm SUST_B_1D_ARRAY_V4B32_CLAMP
4762  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4763
4764defm SUST_B_1D_ARRAY_V4B8_TRAP
4765  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4766defm SUST_B_1D_ARRAY_V4B16_TRAP
4767  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4768defm SUST_B_1D_ARRAY_V4B32_TRAP
4769  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4770
4771defm SUST_B_1D_ARRAY_V4B8_ZERO
4772  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4773defm SUST_B_1D_ARRAY_V4B16_ZERO
4774  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4775defm SUST_B_1D_ARRAY_V4B32_ZERO
4776  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4777
4778defm SUST_P_1D_ARRAY_V4B8_TRAP
4779  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4780defm SUST_P_1D_ARRAY_V4B16_TRAP
4781  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4782defm SUST_P_1D_ARRAY_V4B32_TRAP
4783  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4784
4785class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4786    : NVPTXInst<(outs),
4787                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4788                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4789                []>;
4790multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4791  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4792  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4793}
4794
4795defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4796defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4797defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4798defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4799
4800defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4801defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4802defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4803defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4804
4805defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4806defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4807defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4808defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4809
4810defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4811defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4812defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4813
4814class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4815    : NVPTXInst<(outs),
4816                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4817                                intype:$r, intype:$g)),
4818                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4819                []>;
4820multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4821  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4822  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4823}
4824
4825defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4826defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4827defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4828defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4829
4830defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4831defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4832defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4833defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4834
4835defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4836defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4837defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4838defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4839
4840defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4841defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4842defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4843
4844class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4845    : NVPTXInst<(outs),
4846                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4847                                intype:$r, intype:$g, intype:$b, intype:$a)),
4848                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4849                []>;
4850multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4851  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4852  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4853}
4854
4855defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4856defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4857defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4858
4859defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4860defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4861defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4862
4863defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4864defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4865defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4866
4867defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4868defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4869defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4870
4871class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4872    : NVPTXInst<(outs),
4873                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4874                                intype:$r)),
4875                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4876                []>;
4877multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4878  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4879  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4880}
4881
4882defm SUST_B_2D_ARRAY_B8_CLAMP
4883  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4884defm SUST_B_2D_ARRAY_B16_CLAMP
4885  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4886defm SUST_B_2D_ARRAY_B32_CLAMP
4887  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4888defm SUST_B_2D_ARRAY_B64_CLAMP
4889  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4890
4891defm SUST_B_2D_ARRAY_B8_TRAP
4892  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4893defm SUST_B_2D_ARRAY_B16_TRAP
4894  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4895defm SUST_B_2D_ARRAY_B32_TRAP
4896  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4897defm SUST_B_2D_ARRAY_B64_TRAP
4898  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4899
4900defm SUST_B_2D_ARRAY_B8_ZERO
4901  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4902defm SUST_B_2D_ARRAY_B16_ZERO
4903  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4904defm SUST_B_2D_ARRAY_B32_ZERO
4905  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4906defm SUST_B_2D_ARRAY_B64_ZERO
4907  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4908
4909defm SUST_P_2D_ARRAY_B8_TRAP
4910  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4911defm SUST_P_2D_ARRAY_B16_TRAP
4912  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4913defm SUST_P_2D_ARRAY_B32_TRAP
4914  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4915
4916class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4917    : NVPTXInst<(outs),
4918                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4919                                intype:$r, intype:$g)),
4920                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4921                []>;
4922multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4923  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4924  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4925}
4926
4927defm SUST_B_2D_ARRAY_V2B8_CLAMP
4928  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4929defm SUST_B_2D_ARRAY_V2B16_CLAMP
4930  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4931defm SUST_B_2D_ARRAY_V2B32_CLAMP
4932  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4933defm SUST_B_2D_ARRAY_V2B64_CLAMP
4934  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4935
4936defm SUST_B_2D_ARRAY_V2B8_TRAP
4937  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4938defm SUST_B_2D_ARRAY_V2B16_TRAP
4939  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4940defm SUST_B_2D_ARRAY_V2B32_TRAP
4941  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4942defm SUST_B_2D_ARRAY_V2B64_TRAP
4943  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4944
4945defm SUST_B_2D_ARRAY_V2B8_ZERO
4946  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4947defm SUST_B_2D_ARRAY_V2B16_ZERO
4948  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4949defm SUST_B_2D_ARRAY_V2B32_ZERO
4950  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4951defm SUST_B_2D_ARRAY_V2B64_ZERO
4952  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4953
4954defm SUST_P_2D_ARRAY_V2B8_TRAP
4955  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4956defm SUST_P_2D_ARRAY_V2B16_TRAP
4957  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4958defm SUST_P_2D_ARRAY_V2B32_TRAP
4959  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4960
4961class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4962    : NVPTXInst<(outs),
4963                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4964                                intype:$r, intype:$g, intype:$b, intype:$a)),
4965                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4966                []>;
4967multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4968  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4969  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4970}
4971
4972defm SUST_B_2D_ARRAY_V4B8_CLAMP
4973  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4974defm SUST_B_2D_ARRAY_V4B16_CLAMP
4975  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4976defm SUST_B_2D_ARRAY_V4B32_CLAMP
4977  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4978
4979defm SUST_B_2D_ARRAY_V4B8_TRAP
4980  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4981defm SUST_B_2D_ARRAY_V4B16_TRAP
4982  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4983defm SUST_B_2D_ARRAY_V4B32_TRAP
4984  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4985
4986defm SUST_B_2D_ARRAY_V4B8_ZERO
4987  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4988defm SUST_B_2D_ARRAY_V4B16_ZERO
4989  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4990defm SUST_B_2D_ARRAY_V4B32_ZERO
4991  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4992
4993defm SUST_P_2D_ARRAY_V4B8_TRAP
4994  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4995defm SUST_P_2D_ARRAY_V4B16_TRAP
4996  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4997defm SUST_P_2D_ARRAY_V4B32_TRAP
4998  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4999
5000class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
5001    : NVPTXInst<(outs),
5002                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5003                                intype:$r)),
5004                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5005                []>;
5006multiclass SUST_3D<string inst, NVPTXRegClass intype> {
5007  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
5008  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
5009}
5010
5011defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
5012defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
5013defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
5014defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
5015
5016defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
5017defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
5018defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
5019defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
5020
5021defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
5022defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
5023defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
5024defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
5025
5026defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
5027defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
5028defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
5029
5030class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
5031    : NVPTXInst<(outs),
5032                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5033                                intype:$r, intype:$g)),
5034                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
5035                []>;
5036multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
5037  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
5038  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
5039}
5040
5041defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
5042defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
5043defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
5044defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
5045
5046defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
5047defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
5048defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
5049defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
5050
5051defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
5052defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
5053defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
5054defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
5055
5056defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
5057defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
5058defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
5059
5060class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
5061    : NVPTXInst<(outs),
5062                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5063                                intype:$r, intype:$g, intype:$b, intype:$a)),
5064                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
5065                []>;
5066multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
5067  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5068  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
5069}
5070
5071defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
5072defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
5073defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
5074
5075defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
5076defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
5077defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
5078
5079defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
5080defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
5081defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
5082
5083defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
5084defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
5085defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
5086
5087}
5088
5089// Surface store instruction patterns
5090// I'm not sure why we can't just include these in the instruction definitions,
5091// but TableGen complains of type errors :(
5092
5093// .clamp variant
5094def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5095           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5096          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5097
5098def : Pat<(int_nvvm_sust_b_1d_i16_clamp
5099           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5100          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5101
5102def : Pat<(int_nvvm_sust_b_1d_i32_clamp
5103           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5104          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5105
5106def : Pat<(int_nvvm_sust_b_1d_i64_clamp
5107           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5108          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5109
5110def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
5111           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5112          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5113           Int16Regs:$r, Int16Regs:$g)>;
5114
5115def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
5116           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5117          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5118           Int16Regs:$r, Int16Regs:$g)>;
5119
5120def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
5121           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5122          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5123           Int32Regs:$r, Int32Regs:$g)>;
5124
5125def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
5126           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5127          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5128           Int64Regs:$r, Int64Regs:$g)>;
5129
5130def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
5131           Int64Regs:$s, Int32Regs:$x,
5132           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5133          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5134           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5135
5136def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
5137           Int64Regs:$s, Int32Regs:$x,
5138           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5139          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5140           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5141
5142def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
5143           Int64Regs:$s, Int32Regs:$x,
5144           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5145          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
5146           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5147
5148
5149
5150def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5151           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5152          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5153           Int16Regs:$r)>;
5154
5155def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5156           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5157          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5158           Int16Regs:$r)>;
5159
5160def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5161           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5162          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5163           Int32Regs:$r)>;
5164
5165def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5166           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5167          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5168           Int64Regs:$r)>;
5169
5170def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5171          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5172          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5173           Int16Regs:$r, Int16Regs:$g)>;
5174
5175def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5176          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5177          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5178           Int16Regs:$r, Int16Regs:$g)>;
5179
5180def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5181          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5182          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5183           Int32Regs:$r, Int32Regs:$g)>;
5184
5185def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5186          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5187          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5188           Int64Regs:$r, Int64Regs:$g)>;
5189
5190def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5191           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5192           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5193          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5194           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5195
5196def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5197           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5198           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5199          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5200           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5201
5202def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5203           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5204           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5205          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5206           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5207
5208
5209
5210def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5211           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5212          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5213           Int16Regs:$r)>;
5214
5215def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5216           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5217          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5218           Int16Regs:$r)>;
5219
5220def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5221           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5222          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5223           Int32Regs:$r)>;
5224
5225def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5226           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5227          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5228           Int64Regs:$r)>;
5229
5230def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5231          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5232          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5233           Int16Regs:$r, Int16Regs:$g)>;
5234
5235def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5236          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5237          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5238           Int16Regs:$r, Int16Regs:$g)>;
5239
5240def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5241          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5242          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5243           Int32Regs:$r, Int32Regs:$g)>;
5244
5245def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5246          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5247          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5248           Int64Regs:$r, Int64Regs:$g)>;
5249
5250def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5251           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5252           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5253          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5254           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5255
5256def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5257           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5258           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5259          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5260           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5261
5262def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5263           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5264           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5265          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5266           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5267
5268
5269
5270def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5271          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5272          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5273           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5274           Int16Regs:$r)>;
5275
5276def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5277          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5278          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5279           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5280           Int16Regs:$r)>;
5281
5282def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5283          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5284          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5285           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5286           Int32Regs:$r)>;
5287
5288def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5289          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5290          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5291           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5292           Int64Regs:$r)>;
5293
5294def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5295           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5296           Int16Regs:$r, Int16Regs:$g),
5297          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5298           Int32Regs:$x, Int32Regs:$y,
5299           Int16Regs:$r, Int16Regs:$g)>;
5300
5301def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5302           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5303           Int16Regs:$r, Int16Regs:$g),
5304          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5305           Int32Regs:$x, Int32Regs:$y,
5306           Int16Regs:$r, Int16Regs:$g)>;
5307
5308def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5309           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5310           Int32Regs:$g),
5311          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5312           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5313
5314def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5315           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5316           Int64Regs:$g),
5317          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5318           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5319
5320def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5321           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5322           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5323          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5324           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5325           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5326
5327def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5328           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5329           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5330          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5331           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5332           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5333
5334def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5335           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5336           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5337          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5338           Int32Regs:$x, Int32Regs:$y,
5339           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5340
5341
5342
5343def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5344           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5345           Int16Regs:$r),
5346          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5347           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5348           Int16Regs:$r)>;
5349
5350def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5351           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5352           Int16Regs:$r),
5353          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5354           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5355           Int16Regs:$r)>;
5356
5357def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5358           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5359           Int32Regs:$r),
5360          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5361           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5362           Int32Regs:$r)>;
5363
5364def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5365           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5366           Int64Regs:$r),
5367          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5368           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5369           Int64Regs:$r)>;
5370
5371def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5372           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5373           Int16Regs:$r, Int16Regs:$g),
5374          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5375           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5376           Int16Regs:$r, Int16Regs:$g)>;
5377
5378def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5379           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5380           Int16Regs:$r, Int16Regs:$g),
5381          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5382           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5383           Int16Regs:$r, Int16Regs:$g)>;
5384
5385def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5386           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5387           Int32Regs:$r, Int32Regs:$g),
5388          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5389           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5390           Int32Regs:$r, Int32Regs:$g)>;
5391
5392def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5393           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5394           Int64Regs:$r, Int64Regs:$g),
5395          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5396           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5397           Int64Regs:$r, Int64Regs:$g)>;
5398
5399def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5400           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5401           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5402          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5403           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5404           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5405
5406def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5407           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5408           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5409          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5410           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5411           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5412
5413def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5414           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5415           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5416          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5417           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5418           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5419
5420
5421// .trap variant
5422def : Pat<(int_nvvm_sust_b_1d_i8_trap
5423           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5424          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5425
5426def : Pat<(int_nvvm_sust_b_1d_i16_trap
5427           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5428          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5429
5430def : Pat<(int_nvvm_sust_b_1d_i32_trap
5431           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5432          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5433
5434def : Pat<(int_nvvm_sust_b_1d_i64_trap
5435           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5436          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5437
5438def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5439           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5440          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5441           Int16Regs:$r, Int16Regs:$g)>;
5442
5443def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5444           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5445          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5446           Int16Regs:$r, Int16Regs:$g)>;
5447
5448def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5449           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5450          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5451           Int32Regs:$r, Int32Regs:$g)>;
5452
5453def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5454           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5455          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5456           Int64Regs:$r, Int64Regs:$g)>;
5457
5458def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5459           Int64Regs:$s, Int32Regs:$x,
5460           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5461          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5462           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5463
5464def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5465           Int64Regs:$s, Int32Regs:$x,
5466           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5467          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5468           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5469
5470def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5471           Int64Regs:$s, Int32Regs:$x,
5472           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5473          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5474           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5475
5476
5477
5478def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5479           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5480          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5481           Int16Regs:$r)>;
5482
5483def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5484           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5485          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5486           Int16Regs:$r)>;
5487
5488def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5489           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5490          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5491           Int32Regs:$r)>;
5492
5493def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5494           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5495          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5496           Int64Regs:$r)>;
5497
5498def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5499          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5500          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5501           Int16Regs:$r, Int16Regs:$g)>;
5502
5503def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5504          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5505          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5506           Int16Regs:$r, Int16Regs:$g)>;
5507
5508def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5509          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5510          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5511           Int32Regs:$r, Int32Regs:$g)>;
5512
5513def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5514          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5515          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5516           Int64Regs:$r, Int64Regs:$g)>;
5517
5518def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5519           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5520           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5521          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5522           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5523
5524def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5525           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5526           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5527          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5528           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5529
5530def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5531           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5532           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5533          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5534           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5535
5536
5537
5538def : Pat<(int_nvvm_sust_b_2d_i8_trap
5539           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5540          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5541           Int16Regs:$r)>;
5542
5543def : Pat<(int_nvvm_sust_b_2d_i16_trap
5544           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5545          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5546           Int16Regs:$r)>;
5547
5548def : Pat<(int_nvvm_sust_b_2d_i32_trap
5549           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5550          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5551           Int32Regs:$r)>;
5552
5553def : Pat<(int_nvvm_sust_b_2d_i64_trap
5554           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5555          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5556           Int64Regs:$r)>;
5557
5558def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5559          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5560          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5561           Int16Regs:$r, Int16Regs:$g)>;
5562
5563def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5564          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5565          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5566           Int16Regs:$r, Int16Regs:$g)>;
5567
5568def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5569          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5570          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5571           Int32Regs:$r, Int32Regs:$g)>;
5572
5573def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5574          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5575          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5576           Int64Regs:$r, Int64Regs:$g)>;
5577
5578def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5579           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5580           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5581          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5582           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5583
5584def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5585           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5586           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5587          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5588           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5589
5590def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5591           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5592           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5593          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5594           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5595
5596
5597
5598def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5599          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5600          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5601           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5602           Int16Regs:$r)>;
5603
5604def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5605          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5606          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5607           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5608           Int16Regs:$r)>;
5609
5610def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5611          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5612          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5613           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5614           Int32Regs:$r)>;
5615
5616def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5617          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5618          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5619           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5620           Int64Regs:$r)>;
5621
5622def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5623           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5624           Int16Regs:$r, Int16Regs:$g),
5625          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5626           Int32Regs:$x, Int32Regs:$y,
5627           Int16Regs:$r, Int16Regs:$g)>;
5628
5629def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5630           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5631           Int16Regs:$r, Int16Regs:$g),
5632          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5633           Int32Regs:$x, Int32Regs:$y,
5634           Int16Regs:$r, Int16Regs:$g)>;
5635
5636def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5637           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5638           Int32Regs:$g),
5639          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5640           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5641
5642def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5643           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5644           Int64Regs:$g),
5645          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5646           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5647
5648def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5649           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5650           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5651          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5652           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5653           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5654
5655def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5656           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5657           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5658          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5659           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5660           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5661
5662def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5663           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5664           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5665          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5666           Int32Regs:$x, Int32Regs:$y,
5667           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5668
5669
5670
5671def : Pat<(int_nvvm_sust_b_3d_i8_trap
5672           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5673           Int16Regs:$r),
5674          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5675           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5676           Int16Regs:$r)>;
5677
5678def : Pat<(int_nvvm_sust_b_3d_i16_trap
5679           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5680           Int16Regs:$r),
5681          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5682           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5683           Int16Regs:$r)>;
5684
5685def : Pat<(int_nvvm_sust_b_3d_i32_trap
5686           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5687           Int32Regs:$r),
5688          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5689           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5690           Int32Regs:$r)>;
5691
5692def : Pat<(int_nvvm_sust_b_3d_i64_trap
5693           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5694           Int64Regs:$r),
5695          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5696           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5697           Int64Regs:$r)>;
5698
5699def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5700           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5701           Int16Regs:$r, Int16Regs:$g),
5702          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5703           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5704           Int16Regs:$r, Int16Regs:$g)>;
5705
5706def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5707           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5708           Int16Regs:$r, Int16Regs:$g),
5709          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5710           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5711           Int16Regs:$r, Int16Regs:$g)>;
5712
5713def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5714           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5715           Int32Regs:$r, Int32Regs:$g),
5716          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5717           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5718           Int32Regs:$r, Int32Regs:$g)>;
5719
5720def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5721           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5722           Int64Regs:$r, Int64Regs:$g),
5723          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5724           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5725           Int64Regs:$r, Int64Regs:$g)>;
5726
5727def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5728           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5729           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5730          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5731           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5732           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5733
5734def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5735           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5736           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5737          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5738           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5739           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5740
5741def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5742           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5743           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5744          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5745           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5746           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5747
5748
5749// .zero variant
5750def : Pat<(int_nvvm_sust_b_1d_i8_zero
5751           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5752          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5753
5754def : Pat<(int_nvvm_sust_b_1d_i16_zero
5755           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5756          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5757
5758def : Pat<(int_nvvm_sust_b_1d_i32_zero
5759           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5760          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5761
5762def : Pat<(int_nvvm_sust_b_1d_i64_zero
5763           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5764          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5765
5766def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5767           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5768          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5769           Int16Regs:$r, Int16Regs:$g)>;
5770
5771def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5772           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5773          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5774           Int16Regs:$r, Int16Regs:$g)>;
5775
5776def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5777           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5778          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5779           Int32Regs:$r, Int32Regs:$g)>;
5780
5781def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5782           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5783          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5784           Int64Regs:$r, Int64Regs:$g)>;
5785
5786def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5787           Int64Regs:$s, Int32Regs:$x,
5788           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5789          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5790           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5791
5792def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5793           Int64Regs:$s, Int32Regs:$x,
5794           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5795          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5796           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5797
5798def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5799           Int64Regs:$s, Int32Regs:$x,
5800           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5801          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5802           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5803
5804
5805
5806def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5807           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5808          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5809           Int16Regs:$r)>;
5810
5811def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5812           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5813          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5814           Int16Regs:$r)>;
5815
5816def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5817           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5818          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5819           Int32Regs:$r)>;
5820
5821def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5822           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5823          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5824           Int64Regs:$r)>;
5825
5826def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5827          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5828          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5829           Int16Regs:$r, Int16Regs:$g)>;
5830
5831def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5832          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5833          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5834           Int16Regs:$r, Int16Regs:$g)>;
5835
5836def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5837          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5838          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5839           Int32Regs:$r, Int32Regs:$g)>;
5840
5841def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5842          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5843          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5844           Int64Regs:$r, Int64Regs:$g)>;
5845
5846def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5847           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5848           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5849          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5850           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5851
5852def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5853           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5854           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5855          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5856           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5857
5858def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5859           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5860           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5861          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5862           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5863
5864
5865
5866def : Pat<(int_nvvm_sust_b_2d_i8_zero
5867           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5868          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5869           Int16Regs:$r)>;
5870
5871def : Pat<(int_nvvm_sust_b_2d_i16_zero
5872           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5874           Int16Regs:$r)>;
5875
5876def : Pat<(int_nvvm_sust_b_2d_i32_zero
5877           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5878          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5879           Int32Regs:$r)>;
5880
5881def : Pat<(int_nvvm_sust_b_2d_i64_zero
5882           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5883          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5884           Int64Regs:$r)>;
5885
5886def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5887          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5888          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5889           Int16Regs:$r, Int16Regs:$g)>;
5890
5891def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5892          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5893          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5894           Int16Regs:$r, Int16Regs:$g)>;
5895
5896def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5897          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5898          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5899           Int32Regs:$r, Int32Regs:$g)>;
5900
5901def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5902          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5903          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5904           Int64Regs:$r, Int64Regs:$g)>;
5905
5906def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5907           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5908           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5909          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5910           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5911
5912def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5913           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5914           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5915          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5916           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5917
5918def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5919           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5920           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5921          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5922           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5923
5924
5925
5926def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5927          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5928          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5929           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5930           Int16Regs:$r)>;
5931
5932def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5933          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5934          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5935           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5936           Int16Regs:$r)>;
5937
5938def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5939          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5940          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5941           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5942           Int32Regs:$r)>;
5943
5944def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5945          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5946          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5947           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5948           Int64Regs:$r)>;
5949
5950def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5951           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5952           Int16Regs:$r, Int16Regs:$g),
5953          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5954           Int32Regs:$x, Int32Regs:$y,
5955           Int16Regs:$r, Int16Regs:$g)>;
5956
5957def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5958           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5959           Int16Regs:$r, Int16Regs:$g),
5960          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5961           Int32Regs:$x, Int32Regs:$y,
5962           Int16Regs:$r, Int16Regs:$g)>;
5963
5964def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5965           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5966           Int32Regs:$g),
5967          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5968           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5969
5970def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5971           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5972           Int64Regs:$g),
5973          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5974           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5975
5976def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5977           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5978           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5979          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5980           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5981           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5982
5983def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5984           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5985           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5986          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5987           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5988           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5989
5990def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5991           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5992           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5993          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5994           Int32Regs:$x, Int32Regs:$y,
5995           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5996
5997
5998
5999def : Pat<(int_nvvm_sust_b_3d_i8_zero
6000           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6001           Int16Regs:$r),
6002          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
6003           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6004           Int16Regs:$r)>;
6005
6006def : Pat<(int_nvvm_sust_b_3d_i16_zero
6007           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6008           Int16Regs:$r),
6009          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
6010           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6011           Int16Regs:$r)>;
6012
6013def : Pat<(int_nvvm_sust_b_3d_i32_zero
6014           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6015           Int32Regs:$r),
6016          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
6017           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6018           Int32Regs:$r)>;
6019
6020def : Pat<(int_nvvm_sust_b_3d_i64_zero
6021           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6022           Int64Regs:$r),
6023          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
6024           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6025           Int64Regs:$r)>;
6026
6027def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6028           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6029           Int16Regs:$r, Int16Regs:$g),
6030          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
6031           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6032           Int16Regs:$r, Int16Regs:$g)>;
6033
6034def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6035           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6036           Int16Regs:$r, Int16Regs:$g),
6037          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
6038           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6039           Int16Regs:$r, Int16Regs:$g)>;
6040
6041def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6042           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6043           Int32Regs:$r, Int32Regs:$g),
6044          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
6045           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6046           Int32Regs:$r, Int32Regs:$g)>;
6047
6048def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6049           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6050           Int64Regs:$r, Int64Regs:$g),
6051          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
6052           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6053           Int64Regs:$r, Int64Regs:$g)>;
6054
6055def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6056           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6057           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6058          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
6059           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6060           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6061
6062def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6063           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6064           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6065          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
6066           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6067           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6068
6069def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6070           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6071           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6072          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
6073           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6074           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6075
6076
6077
6078
6079def : Pat<(int_nvvm_sust_p_1d_i8_trap
6080           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6081          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6082
6083def : Pat<(int_nvvm_sust_p_1d_i16_trap
6084           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6085          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6086
6087def : Pat<(int_nvvm_sust_p_1d_i32_trap
6088           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6089          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6090
6091def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6092           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6093          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6094           Int16Regs:$r, Int16Regs:$g)>;
6095
6096def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
6097           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6098          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6099           Int16Regs:$r, Int16Regs:$g)>;
6100
6101def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
6102           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6103          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6104           Int32Regs:$r, Int32Regs:$g)>;
6105
6106def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
6107           Int64Regs:$s, Int32Regs:$x,
6108           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6109          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
6110           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6111
6112def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
6113           Int64Regs:$s, Int32Regs:$x,
6114           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6115          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
6116           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6117
6118def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
6119           Int64Regs:$s, Int32Regs:$x,
6120           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6121          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
6122           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6123
6124
6125
6126def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
6127           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6128          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6129           Int16Regs:$r)>;
6130
6131def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
6132           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6133          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6134           Int16Regs:$r)>;
6135
6136def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
6137           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6138          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6139           Int32Regs:$r)>;
6140
6141def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
6142          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6143          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6144           Int16Regs:$r, Int16Regs:$g)>;
6145
6146def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
6147          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6148          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6149           Int16Regs:$r, Int16Regs:$g)>;
6150
6151def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6152          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6153          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6154           Int32Regs:$r, Int32Regs:$g)>;
6155
6156def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6157           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6158           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6159          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6160           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6161
6162def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6163           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6164           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6165          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6166           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6167
6168def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6169           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6170           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6171          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6172           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6173
6174
6175
6176def : Pat<(int_nvvm_sust_p_2d_i8_trap
6177           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6178          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6179           Int16Regs:$r)>;
6180
6181def : Pat<(int_nvvm_sust_p_2d_i16_trap
6182           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6183          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6184           Int16Regs:$r)>;
6185
6186def : Pat<(int_nvvm_sust_p_2d_i32_trap
6187           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6188          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6189           Int32Regs:$r)>;
6190
6191def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6192          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6193          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6194           Int16Regs:$r, Int16Regs:$g)>;
6195
6196def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6197          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6198          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6199           Int16Regs:$r, Int16Regs:$g)>;
6200
6201def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6202          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6203          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6204           Int32Regs:$r, Int32Regs:$g)>;
6205
6206def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6207           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6208           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6209          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6210           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6211
6212def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6213           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6214           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6215          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6216           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6217
6218def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6219           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6220           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6221          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6222           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6223
6224
6225
6226def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6227          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6228          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6229           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6230           Int16Regs:$r)>;
6231
6232def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6233          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6234          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6235           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6236           Int16Regs:$r)>;
6237
6238def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6239          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6240          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6241           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6242           Int32Regs:$r)>;
6243
6244def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6245           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6246           Int16Regs:$r, Int16Regs:$g),
6247          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6248           Int32Regs:$x, Int32Regs:$y,
6249           Int16Regs:$r, Int16Regs:$g)>;
6250
6251def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6252           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6253           Int16Regs:$r, Int16Regs:$g),
6254          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6255           Int32Regs:$x, Int32Regs:$y,
6256           Int16Regs:$r, Int16Regs:$g)>;
6257
6258def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6259           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6260           Int32Regs:$g),
6261          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6262           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6263
6264def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6265           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6266           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6267          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6268           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6269           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6270
6271def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6272           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6273           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6274          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6275           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6276           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6277
6278def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6279           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6280           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6281          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6282           Int32Regs:$x, Int32Regs:$y,
6283           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6284
6285
6286
6287def : Pat<(int_nvvm_sust_p_3d_i8_trap
6288           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6289           Int16Regs:$r),
6290          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6291           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6292           Int16Regs:$r)>;
6293
6294def : Pat<(int_nvvm_sust_p_3d_i16_trap
6295           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6296           Int16Regs:$r),
6297          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6298           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6299           Int16Regs:$r)>;
6300
6301def : Pat<(int_nvvm_sust_p_3d_i32_trap
6302           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6303           Int32Regs:$r),
6304          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6305           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6306           Int32Regs:$r)>;
6307
6308def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6309           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6310           Int16Regs:$r, Int16Regs:$g),
6311          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6312           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6313           Int16Regs:$r, Int16Regs:$g)>;
6314
6315def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6316           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6317           Int16Regs:$r, Int16Regs:$g),
6318          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6319           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6320           Int16Regs:$r, Int16Regs:$g)>;
6321
6322def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6323           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6324           Int32Regs:$r, Int32Regs:$g),
6325          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6326           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6327           Int32Regs:$r, Int32Regs:$g)>;
6328
6329def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6330           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6331           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6332          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6333           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6334           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6335
6336def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6337           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6338           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6339          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6340           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6341           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6342
6343def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6344           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6345           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6346          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6347           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6348           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6349
6350//-----------------------------------
6351// Read Special Registers
6352//-----------------------------------
6353
6354class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6355  : NVPTXInst<(outs Int64Regs:$d), (ins),
6356              !strconcat("mov.u64 \t$d, %", regname, ";"),
6357              [(set Int64Regs:$d, (intop))]>,
6358    Requires<Preds>;
6359
6360class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
6361  : NVPTXInst<(outs Int32Regs:$d), (ins),
6362              !strconcat("mov.u32 \t$d, %", regname, ";"),
6363              [(set Int32Regs:$d, (intop))]>,
6364    Requires<Preds>;
6365
6366multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
6367   foreach suffix = ["x", "y", "z", "w"] in {
6368      defvar reg = regname # "." # suffix;
6369      defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
6370      def "_"#suffix :  PTX_READ_SREG_R32<reg, intr, Preds>;
6371   }
6372}
6373
6374// TODO Add read vector-version of special registers
6375
6376defm INT_PTX_SREG_TID   : PTX_READ_SREG_R32V4<"tid">;
6377defm INT_PTX_SREG_NTID  : PTX_READ_SREG_R32V4<"ntid">;
6378defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
6379defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
6380
6381defm INT_PTX_SREG_CLUSTERID :
6382       PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
6383defm INT_PTX_SREG_NCLUSTERID :
6384       PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
6385defm INT_PTX_SREG_CLUSTER_CTAID :
6386       PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
6387defm INT_PTX_SREG_CLUSTER_NCTAID:
6388       PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
6389
6390def  INT_PTX_SREG_CLUSTER_CTARANK :
6391       PTX_READ_SREG_R32<"cluster_ctarank",
6392                         int_nvvm_read_ptx_sreg_cluster_ctarank,
6393                         [hasSM<90>, hasPTX<78>]>;
6394def  INT_PTX_SREG_CLUSTER_NCTARANK:
6395       PTX_READ_SREG_R32<"cluster_nctarank",
6396                         int_nvvm_read_ptx_sreg_cluster_nctarank,
6397                         [hasSM<90>, hasPTX<78>]>;
6398
6399
6400def INT_PTX_SREG_LANEID :
6401    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6402def INT_PTX_SREG_WARPID :
6403    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6404def INT_PTX_SREG_NWARPID :
6405    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6406def INT_PTX_SREG_SMID :
6407    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6408def INT_PTX_SREG_NSMID :
6409    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6410def INT_PTX_SREG_GRIDID :
6411    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6412
6413def INT_PTX_SREG_LANEMASK_EQ :
6414    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6415def INT_PTX_SREG_LANEMASK_LE :
6416    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6417def INT_PTX_SREG_LANEMASK_LT :
6418    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6419def INT_PTX_SREG_LANEMASK_GE :
6420    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6421def INT_PTX_SREG_LANEMASK_GT :
6422    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6423
6424let hasSideEffects = 1 in {
6425def INT_PTX_SREG_CLOCK :
6426    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6427def INT_PTX_SREG_CLOCK64 :
6428    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6429def INT_PTX_SREG_GLOBALTIMER :
6430    PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
6431}
6432
6433def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>;
6434def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>;
6435
6436def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6437def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6438def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6439def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6440
6441// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6442// handle the constant.
6443def INT_PTX_SREG_WARPSIZE :
6444    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6445              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6446
6447// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6448// In addition to target-independent fields provided by WMMA_REGS, it adds
6449// the fields commonly used to implement specific PTX instruction -- register
6450// types and names, constraints, parts of assembly, etc.
6451class WMMA_REGINFO<WMMA_REGS r, string op>
6452      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6453  // NVPTX register types used to carry fragment data.
6454  NVPTXRegClass regclass = !cond(
6455    !eq(ptx_elt_type, "f16") : Int32Regs,
6456    !eq(ptx_elt_type, "f32") : Float32Regs,
6457    !eq(ptx_elt_type, "f64") : Float64Regs,
6458    !eq(ptx_elt_type, "bf16") : Int32Regs,
6459    !eq(ptx_elt_type, "tf32") : Int32Regs,
6460    !eq(ptx_elt_type, "s32") : Int32Regs,
6461    !eq(ptx_elt_type, "b16") : Int32Regs,
6462    !eq(ptx_elt_type, "s8") : Int32Regs,
6463    !eq(ptx_elt_type, "u8") : Int32Regs,
6464    !eq(ptx_elt_type, "s4") : Int32Regs,
6465    !eq(ptx_elt_type, "u4") : Int32Regs,
6466    !eq(ptx_elt_type, "b1") : Int32Regs);
6467
6468  // Instruction input/output arguments for the fragment.
6469  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6470
6471  // List of register names for the fragment -- ["ra0", "ra1",...]
6472  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6473
6474  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6475  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6476
6477  // Predicates for particular fragment variant. Technically those are
6478  // per-instruction predicates, but currently all fragments that can be used in
6479  // a given instruction are subject to the same constraints, so an instruction
6480  // can use predicates from any of its fragments. If/when this is no
6481  // longer the case, we can concat all per-fragment predicates to enforce that
6482  // all fragments of the instruction are viable.
6483  list<Predicate> Predicates = !cond(
6484    // fp16 -> fp16/fp32 @ m16n16k16
6485    !and(!eq(geom, "m16n16k16"),
6486         !or(!eq(ptx_elt_type, "f16"),
6487             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
6488
6489    !and(!eq(geom,"m8n8k4"),
6490         !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6491
6492    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6493    !and(!or(!eq(geom, "m8n32k16"),
6494             !eq(geom, "m32n8k16")),
6495         !or(!eq(ptx_elt_type, "f16"),
6496             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
6497
6498    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6499    !and(!or(!eq(geom,"m16n16k16"),
6500             !eq(geom,"m8n32k16"),
6501             !eq(geom,"m32n8k16")),
6502         !or(!eq(ptx_elt_type, "u8"),
6503             !eq(ptx_elt_type, "s8"),
6504             !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
6505
6506    !and(!or(!eq(geom,"m16n16k16"),
6507             !eq(geom,"m8n32k16"),
6508             !eq(geom,"m32n8k16")),
6509         !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6510
6511    !and(!eq(geom,"m16n16k8"),
6512         !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6513
6514    !and(!eq(geom,"m16n16k8"),
6515         !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6516
6517    // b1 -> s32 @ m8n8k128(b1)
6518    !and(!ne(op,"mma"),
6519         !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6520
6521    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6522    !and(!ne(op,"mma"),
6523         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6524
6525    !or(!eq(geom,"m16n8k8"),
6526        !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6527
6528    !and(!ne(ptx_elt_type,"f64"),
6529         !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6530
6531    // mma m8n8k32 requires higher PTX version
6532    !and(!eq(op,"mma"),
6533         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6534
6535    !and(!eq(ptx_elt_type,"f64"),
6536         !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6537
6538    !and(!eq(op,"mma"),
6539         !or(!eq(geom, "m16n8k16"),
6540             !eq(geom, "m16n8k4"),
6541             !eq(geom, "m16n8k32"),
6542             !eq(geom, "m16n8k64"),
6543             !eq(geom, "m8n8k128"),
6544             !eq(geom, "m16n8k128"),
6545             !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6546
6547    !and(!eq(op,"ldmatrix"),
6548         !eq(ptx_elt_type,"b16"),
6549         !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
6550
6551  // template DAGs for instruction inputs/output.
6552  dag Outs = !dag(outs, ptx_regs, reg_names);
6553  dag Ins = !dag(ins, ptx_regs, reg_names);
6554}
6555
6556// Convert dag of arguments into a dag to match given intrinsic.
6557class BuildPatternI<Intrinsic Intr, dag Ins> {
6558  // Build a dag pattern that matches the intrinsic call.
6559  dag ret = !foreach(tmp, Ins,
6560                          !subst(imem, ADDRvar,
6561                          !subst(MEMri64, ADDRri64,
6562                          !subst(MEMri, ADDRri,
6563                          !subst(ins, Intr, tmp)))));
6564}
6565
6566// Same as above, but uses PatFrag instead of an Intrinsic.
6567class BuildPatternPF<PatFrag Intr, dag Ins> {
6568  // Build a dag pattern that matches the intrinsic call.
6569  dag ret = !foreach(tmp, Ins,
6570                          !subst(imem, ADDRvar,
6571                          !subst(MEMri64, ADDRri64,
6572                          !subst(MEMri, ADDRri,
6573                          !subst(ins, Intr, tmp)))));
6574}
6575
6576// Common WMMA-related fields used for building patterns for all MMA instructions.
6577class WMMA_INSTR<string _Intr, list<dag> _Args>
6578  : NVPTXInst<(outs), (ins), "?", []> {
6579  Intrinsic Intr = !cast<Intrinsic>(_Intr);
6580  // Concatenate all arguments into a single dag.
6581  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6582  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6583  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6584}
6585
6586//
6587// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6588//
6589
6590class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6591                DAGOperand SrcOp>
6592  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6593                              [!con((ins SrcOp:$src),
6594                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6595    Requires<Frag.Predicates> {
6596  // Load/store intrinsics are overloaded on pointer's address space.
6597  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6598  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6599  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6600  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6601  // Build PatFrag that only matches particular address space.
6602  PatFrag IntrFrag = PatFrag<PFOperands,
6603                             PFOperandsIntr,
6604                             !cond(!eq(Space, ".shared"): AS_match.shared,
6605                                   !eq(Space, ".global"): AS_match.global,
6606                                   true: AS_match.generic)>;
6607  // Build AS-constrained pattern.
6608  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6609
6610  let OutOperandList = Frag.Outs;
6611  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6612  let AsmString = "wmma.load."
6613                  # Frag.frag
6614                  # ".sync"
6615                  # "${ptx:aligned}"
6616                  # "." # Layout
6617                  # "." # Frag.geom
6618                  # Space
6619                  # "." # Frag.ptx_elt_type # " \t"
6620                  # Frag.regstring
6621                  # ", [$src]"
6622                  # !if(WithStride, ", $ldm", "")
6623                  # ";";
6624}
6625
6626//
6627// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6628//
6629class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6630                   bit WithStride, DAGOperand DstOp>
6631  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6632               [!con((ins DstOp:$dst),
6633                     Frag.Ins,
6634                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6635    Requires<Frag.Predicates> {
6636
6637  // Load/store intrinsics are overloaded on pointer's address space.
6638  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6639  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6640  dag PFOperands = !con((ops node:$dst),
6641                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6642                        !if(WithStride, (ops node:$ldm), (ops)));
6643  // Build PatFrag that only matches particular address space.
6644  PatFrag IntrFrag = PatFrag<PFOperands,
6645                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6646                             !cond(!eq(Space, ".shared"): AS_match.shared,
6647                                   !eq(Space, ".global"): AS_match.global,
6648                                   true: AS_match.generic)>;
6649  // Build AS-constrained pattern.
6650  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6651
6652  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6653  let OutOperandList = (outs);
6654  let AsmString = "wmma.store.d.sync"
6655                  # "${ptx:aligned}"
6656                  # "." # Layout
6657                  # "." # Frag.geom
6658                  # Space
6659                  # "." # Frag.ptx_elt_type
6660                  # " \t[$dst],"
6661                  # Frag.regstring
6662                  # !if(WithStride, ", $ldm", "")
6663                  # ";";
6664}
6665
6666// Create all load/store variants
6667defset list<WMMA_INSTR> MMA_LDSTs  = {
6668  foreach layout = ["row", "col"] in {
6669    foreach stride = [false, true] in {
6670      foreach space = [".global", ".shared", ""] in {
6671        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6672          foreach frag = NVVM_MMA_OPS.all_ld_ops in
6673            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6674              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6675          foreach frag = NVVM_MMA_OPS.all_st_ops in
6676            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6677              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6678        } // addr
6679      } // space
6680    } // stride
6681  } // layout
6682} // defset
6683
6684// B1 instruction variants need extra constraints.
6685class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6686  string Op = b1op;
6687  WMMA_REGINFO Frag = FragA;
6688  list<Predicate> ret = !listconcat(
6689    FragA.Predicates,
6690    !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6691  );
6692}
6693// WMMA.MMA
6694class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6695               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6696               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6697  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6698                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
6699    // Requires does not seem to have effect on Instruction w/o Patterns.
6700    // We set it here anyways and propagate to the Pat<> we construct below.
6701    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6702  let OutOperandList = FragD.Outs;
6703  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6704  string TypeList = !cond(
6705    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6706                                     # "." # FragC.ptx_elt_type,
6707    1: "." # FragD.ptx_elt_type
6708       # "." # FragA.ptx_elt_type
6709       # "." # FragB.ptx_elt_type
6710       # "." # FragC.ptx_elt_type,
6711  );
6712  let AsmString = "wmma.mma"
6713                  # b1op
6714                  # ".sync"
6715                  # "${ptx:aligned}"
6716                  # "." # ALayout
6717                  # "." # BLayout
6718                  # "." # FragA.geom
6719                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6720                  # TypeList
6721                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6722                  # FragD.regstring # ",\n\t\t"
6723                  # FragA.regstring # ",\n\t\t"
6724                  # FragB.regstring # ",\n\t\t"
6725                  # FragC.regstring # ";";
6726}
6727
6728let isConvergent = true in {
6729defset list<WMMA_INSTR> WMMAs  = {
6730  foreach layout_a = ["row", "col"] in {
6731    foreach layout_b = ["row", "col"] in {
6732      foreach satf = [0, 1] in {
6733        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6734          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6735            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6736              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6737                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6738                              WMMA_REGINFO<op[1], "wmma.mma">,
6739                              WMMA_REGINFO<op[2], "wmma.mma">,
6740                              WMMA_REGINFO<op[3], "wmma.mma">,
6741                              layout_a, layout_b, satf, rnd, b1op>;
6742              }
6743            } // b1op
6744          } // op
6745        } // rnd
6746      } // satf
6747    } // layout_b
6748  } // layout_a
6749} // defset
6750}
6751
6752// MMA
6753class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6754               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6755               string ALayout, string BLayout, int Satfinite, string b1op>
6756  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6757                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
6758    // Requires does not seem to have effect on Instruction w/o Patterns.
6759    // We set it here anyways and propagate to the Pat<> we construct below.
6760  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6761  let OutOperandList = FragD.Outs;
6762  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6763  string TypeList = "." # FragD.ptx_elt_type
6764                    # "." # FragA.ptx_elt_type
6765                    # "." # FragB.ptx_elt_type
6766                    # "." # FragC.ptx_elt_type;
6767  let AsmString = "mma.sync.aligned."
6768                  # FragA.geom
6769                  # "." # ALayout
6770                  # "." # BLayout
6771                  # !if(Satfinite, ".satfinite", "")
6772                  # TypeList
6773                  # b1op # "\n\t\t"
6774                  # FragD.regstring # ",\n\t\t"
6775                  # FragA.regstring # ",\n\t\t"
6776                  # FragB.regstring # ",\n\t\t"
6777                  # FragC.regstring # ";";
6778}
6779
6780let isConvergent = true in {
6781defset list<WMMA_INSTR> MMAs  = {
6782  foreach layout_a = ["row", "col"] in {
6783    foreach layout_b = ["row", "col"] in {
6784      foreach satf = [0, 1] in {
6785        foreach op = NVVM_MMA_OPS.all_mma_ops in {
6786          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6787            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6788              def : MMA<WMMA_REGINFO<op[0], "mma">,
6789                        WMMA_REGINFO<op[1], "mma">,
6790                        WMMA_REGINFO<op[2], "mma">,
6791                        WMMA_REGINFO<op[3], "mma">,
6792                        layout_a, layout_b, satf, b1op>;
6793            }
6794          } // b1op
6795        } // op
6796      } // satf
6797    } // layout_b
6798  } // layout_a
6799} // defset
6800}
6801
6802//
6803// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6804//
6805class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6806               DAGOperand SrcOp>
6807  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6808    Requires<Frag.Predicates> {
6809  // Build PatFrag that only matches particular address space.
6810  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6811                             !cond(!eq(Space, ".shared"): AS_match.shared,
6812                                   true: AS_match.generic)>;
6813  // Build AS-constrained pattern.
6814  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6815
6816  let OutOperandList = Frag.Outs;
6817  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6818  let AsmString = "ldmatrix.sync.aligned."
6819                  # Frag.geom
6820                  # "." # Frag.frag
6821                  # !if(Transposed, ".trans", "")
6822                  # Space
6823                  # "." # Frag.ptx_elt_type
6824                  # " " # Frag.regstring # ", [$src];";
6825}
6826
6827// Create all ldmatrix variants
6828defset list<WMMA_INSTR> LDMATRIXs  = {
6829  foreach transposed = [false, true] in {
6830    foreach space = [".shared", ""] in {
6831      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6832        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6833          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6834            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6835                            addr>;
6836      } // addr
6837    } // space
6838  } // transposed
6839} // defset
6840
6841// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6842// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6843// the instruction record.
6844class MMA_PAT<WMMA_INSTR wi>
6845      : Pat<wi.IntrinsicPattern,
6846            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6847                 (wi ptx.version))>,
6848        Requires<wi.Predicates>;
6849
6850// Build intrinsic->instruction patterns for all MMA instructions.
6851foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6852  def : MMA_PAT<mma>;
6853
6854multiclass MAPA<string suffix, Intrinsic Intr> {
6855  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
6856              "mapa" # suffix # ".u32\t$d, $a, $b;",
6857              [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
6858    Requires<[hasSM<90>, hasPTX<78>]>;
6859  def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
6860              "mapa" # suffix # ".u32\t$d, $a, $b;",
6861              [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
6862    Requires<[hasSM<90>, hasPTX<78>]>;
6863  def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
6864              "mapa" # suffix # ".u64\t$d, $a, $b;",
6865              [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
6866    Requires<[hasSM<90>, hasPTX<78>]>;
6867  def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
6868              "mapa" # suffix # ".u64\t$d, $a, $b;",
6869              [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
6870    Requires<[hasSM<90>, hasPTX<78>]>;
6871}
6872
6873defm mapa  : MAPA<"", int_nvvm_mapa>;
6874defm mapa_shared_cluster  : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
6875
6876
6877multiclass GETCTARANK<string suffix, Intrinsic Intr> {
6878  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
6879              "getctarank" # suffix # ".u32\t$d, $a;",
6880              [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
6881    Requires<[hasSM<90>, hasPTX<78>]>;
6882  def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
6883              "getctarank" # suffix # ".u64\t$d, $a;",
6884              [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
6885    Requires<[hasSM<90>, hasPTX<78>]>;
6886}
6887
6888defm getctarank  : GETCTARANK<"", int_nvvm_getctarank>;
6889defm getctarank_shared_cluster  : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
6890
6891def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
6892              "mov.pred\t$d, %is_explicit_cluster;",
6893              [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
6894    Requires<[hasSM<90>, hasPTX<78>]>;
6895
6896// setmaxnreg inc/dec intrinsics
6897let isConvergent = true in {
6898multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
6899  def : NVPTXInst<(outs), (ins i32imm:$reg_count),
6900          "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
6901          [(Intr timm:$reg_count)]>,
6902    Requires<[hasSM90a, hasPTX<80>]>;
6903}
6904
6905defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
6906defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
6907
6908} // isConvergent
6909
6910def INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>;
6911