xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (revision da759cfa320d5076b075d15ff3f00ab3ba5634fd)
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
55                                        [prefix # !add(n, -1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0,1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = 1 in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX60, hasSM30]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX60, hasSM30]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX60, hasSM30]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX60, hasSM30]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX60, hasSM30]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX60, hasSM30]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX60, hasSM30]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX60, hasSM30]>;
135
136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137                 bit offset_imm, bit mask_imm, bit threadmask_imm>
138      : NVPTXInst<(outs), (ins), "?", []> {
139  NVPTXRegClass rc = !cond(
140    !eq(reg, "i32"): Int32Regs,
141    !eq(reg, "f32"): Float32Regs);
142  string IntrName = "int_nvvm_shfl_"
143                    # !if(sync, "sync_", "")
144                    # mode
145                    # "_" # reg
146                    # !if(return_pred, "p", "");
147  Intrinsic Intr = !cast<Intrinsic>(IntrName);
148  let InOperandList = !con(
149    !if(sync,
150        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
151        (ins)),
152    (ins rc:$src),
153    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
155    );
156  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157  let AsmString = "shfl."
158     # !if(sync, "sync.", "")
159     # mode # ".b32\t"
160     # "$dst"
161     # !if(return_pred, "|$pred", "") # ", "
162     # "$src, $offset, $mask"
163     # !if(sync, ", $threadmask", "")
164     # ";"
165     ;
166  let Pattern = [!con(
167      !foreach(tmp, OutOperandList,
168             !subst(outs, set,
169             !subst(i32imm, imm, tmp))),
170      (set !foreach(tmp, InOperandList,
171             !subst(ins, Intr,
172             !subst(i32imm, imm, tmp))))
173  )];
174}
175
176foreach sync = [0, 1] in {
177  foreach mode = ["up", "down", "bfly", "idx"] in {
178    foreach regclass = ["i32", "f32"] in {
179      foreach return_pred = [0, 1] in {
180        foreach offset_imm = [0, 1] in {
181          foreach mask_imm = [0, 1] in {
182            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183              def : SHFL_INSTR<sync, mode, regclass, return_pred,
184                               offset_imm, mask_imm, threadmask_imm>,
185                    Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
186            }
187          }
188        }
189      }
190    }
191  }
192}
193
194// vote.{all,any,uni,ballot}
195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197              "vote." # mode # " \t$dest, $pred;",
198              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199        Requires<[hasPTX60, hasSM30]>;
200}
201
202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
206
207// vote.sync.{all,any,uni,ballot}
208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210              "vote.sync." # mode # " \t$dest, $pred, $mask;",
211              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212          Requires<[hasPTX60, hasSM30]>;
213  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214              "vote.sync." # mode #" \t$dest, $pred, $mask;",
215              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216          Requires<[hasPTX60, hasSM30]>;
217}
218
219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
223
224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
225                          Operand ImmOp> {
226  def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228              [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229           Requires<[hasPTX60, hasSM70]>;
230  def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232              [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233           Requires<[hasPTX60, hasSM70]>;
234  def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236              [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237           Requires<[hasPTX60, hasSM70]>;
238  def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240              [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241           Requires<[hasPTX60, hasSM70]>;
242}
243
244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
245                                        i32imm>;
246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
247                                        i64imm>;
248
249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
250                          Operand ImmOp> {
251  def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252                     (ins i32imm:$mask, ImmOp:$value),
253              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255           Requires<[hasPTX60, hasSM70]>;
256  def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257                     (ins Int32Regs:$mask, ImmOp:$value),
258              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260           Requires<[hasPTX60, hasSM70]>;
261  def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262                     (ins i32imm:$mask, regclass:$value),
263              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265           Requires<[hasPTX60, hasSM70]>;
266  def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267                     (ins Int32Regs:$mask, regclass:$value),
268              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270           Requires<[hasPTX60, hasSM70]>;
271}
272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
273                                         i32imm>;
274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
275                                         i64imm>;
276
277} // isConvergent = 1
278
279//-----------------------------------
280// Explicit Memory Fence Functions
281//-----------------------------------
282class MEMBAR<string StrOp, Intrinsic IntOP> :
283              NVPTXInst<(outs), (ins),
284            StrOp, [(IntOP)]>;
285
286def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
287def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
288def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
289
290
291//-----------------------------------
292// Math Functions
293//-----------------------------------
294
295// Map min(1.0, max(0.0, x)) to sat(x)
296// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
297// NaN
298// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
299// Same story for fmax, fmin.
300
301def : Pat<(int_nvvm_fmin_f immFloat1,
302            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
303          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
304def : Pat<(int_nvvm_fmin_f immFloat1,
305            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
306          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
307def : Pat<(int_nvvm_fmin_f
308            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
309          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
310def : Pat<(int_nvvm_fmin_f
311            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
312          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
313
314def : Pat<(int_nvvm_fmin_d immDouble1,
315            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
316          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
317def : Pat<(int_nvvm_fmin_d immDouble1,
318            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
319          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
320def : Pat<(int_nvvm_fmin_d
321            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
322          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
323def : Pat<(int_nvvm_fmin_d
324            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
325          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
326
327
328// We need a full string for OpcStr here because we need to deal with case like
329// INT_PTX_RECIP.
330class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
331  NVPTXRegClass src_regclass, Intrinsic IntOP>
332            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
333            OpcStr,
334        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
335
336// We need a full string for OpcStr here because we need to deal with the case
337// like INT_PTX_NATIVE_POWR_F.
338class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
339  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
340            : NVPTXInst<(outs t_regclass:$dst),
341              (ins s0_regclass:$src0, s1_regclass:$src1),
342            OpcStr,
343        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
344
345class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
346  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
347  NVPTXRegClass s2_regclass, Intrinsic IntOP>
348            : NVPTXInst<(outs t_regclass:$dst),
349              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
350            OpcStr,
351        [(set t_regclass:$dst,
352          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
353
354//
355// MISC
356//
357
358def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
359  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
360
361//
362// Min Max
363//
364
365def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
366  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
367def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
368  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
369
370def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
371  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
372def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
373  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
374
375def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
376  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
377def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
378  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
379
380
381//
382// Multiplication
383//
384
385def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
386  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
387def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
388  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
389
390def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
391  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
392def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
393  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
394
395def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
396  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
397def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
398  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
399def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
400  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
401def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
402  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
403def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
404  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
405def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
406  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
407def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
408  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
409def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
410  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
411
412def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
413  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
414def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
415  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
416def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
417  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
418def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
419  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
420
421def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
422  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
423def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
424  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
425
426//
427// Div
428//
429
430def INT_NVVM_DIV_APPROX_FTZ_F
431  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
432    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
433def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
434  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
435
436def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
437  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
438def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
439  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
440def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
441  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
442def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
443  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
444def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
445  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
446def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
447  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
448def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
449  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
450def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
451  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
452
453def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
454  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
455def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
456  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
457def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
458  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
459def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
460  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
461
462//
463// Sad
464//
465
466def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
467  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
468def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
469  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
470
471//
472// Floor  Ceil
473//
474
475def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
476          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
477def : Pat<(int_nvvm_floor_f Float32Regs:$a),
478          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
479def : Pat<(int_nvvm_floor_d Float64Regs:$a),
480          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
481
482def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
483          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
484def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
485          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
486def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
487          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
488
489//
490// Abs
491//
492
493def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
494  Float32Regs, int_nvvm_fabs_ftz_f>;
495def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
496  Float32Regs, int_nvvm_fabs_f>;
497
498def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
499  Float64Regs, int_nvvm_fabs_d>;
500
501//
502// Round
503//
504
505def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
506          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
507def : Pat<(int_nvvm_round_f Float32Regs:$a),
508          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
509def : Pat<(int_nvvm_round_d Float64Regs:$a),
510          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
511
512//
513// Trunc
514//
515
516def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
517          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
518def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
519          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
520def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
521          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
522
523//
524// Saturate
525//
526
527def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
528          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
529def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
530          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
532          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
533
534//
535// Exp2  Log2
536//
537
538def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
539  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
540def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
541  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
542def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
543  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
544
545def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
546  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
547def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
548  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
549def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
550  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
551
552//
553// Sin  Cos
554//
555
556def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
557  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
558def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
559  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
560
561def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
562  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
563def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
564  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
565
566//
567// Fma
568//
569
570def INT_NVVM_FMA_RN_FTZ_F
571  : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
572    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
573def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
574  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
575def INT_NVVM_FMA_RZ_FTZ_F
576  : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
577    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
578def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
579  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
580def INT_NVVM_FMA_RM_FTZ_F
581  : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
582    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
583def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
584  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
585def INT_NVVM_FMA_RP_FTZ_F
586  : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
587    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
588def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
589  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
590
591def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
592  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
593def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
594  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
595def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
596  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
597def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
598  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
599
600//
601// Rcp
602//
603
604def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
605  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
606def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
607  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
608def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
609  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
610def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
611  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
612def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
613  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
614def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
615  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
616def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
617  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
618def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
619  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
620
621def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
622  Float64Regs, int_nvvm_rcp_rn_d>;
623def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
624  Float64Regs, int_nvvm_rcp_rz_d>;
625def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
626  Float64Regs, int_nvvm_rcp_rm_d>;
627def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
628  Float64Regs, int_nvvm_rcp_rp_d>;
629
630def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
631  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
632
633//
634// Sqrt
635//
636
637def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
638  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
639def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
640  Float32Regs, int_nvvm_sqrt_rn_f>;
641def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
642  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
643def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
644  Float32Regs, int_nvvm_sqrt_rz_f>;
645def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
646  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
647def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
648  Float32Regs, int_nvvm_sqrt_rm_f>;
649def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
650  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
651def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
652  Float32Regs, int_nvvm_sqrt_rp_f>;
653def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
654  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
655def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
656  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
657
658def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
659  Float64Regs, int_nvvm_sqrt_rn_d>;
660def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
661  Float64Regs, int_nvvm_sqrt_rz_d>;
662def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
663  Float64Regs, int_nvvm_sqrt_rm_d>;
664def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
665  Float64Regs, int_nvvm_sqrt_rp_d>;
666
667// nvvm_sqrt intrinsic
668def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
669          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
670def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
671          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
672def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
673          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
674def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
675          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
676
677//
678// Rsqrt
679//
680
681def INT_NVVM_RSQRT_APPROX_FTZ_F
682  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
683    int_nvvm_rsqrt_approx_ftz_f>;
684def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
685  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
686def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
687  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
688
689//
690// Add
691//
692
693def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
694  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
695def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
696  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
697def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
698  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
699def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
700  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
701def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
702  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
703def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
704  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
705def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
706  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
707def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
708  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
709
710def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
711  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
712def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
713  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
714def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
715  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
716def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
717  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
718
719//
720// Convert
721//
722
723def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
724          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
725def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
726          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
727def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
728          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
729def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
730          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
731def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
732          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
733def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
734          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
735def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
736          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
737def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
738          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
739
740def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
741          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
742def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
743          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
744def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
745          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
746def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
747          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
748
749def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
750          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
751def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
752          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
753def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
754          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
755def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
756          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
757
758def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
759          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
760def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
761          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
762def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
763          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
764def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
765          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
766
767def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
768          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
769def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
770          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
771def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
772          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
773def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
774          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
775
776def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
777          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
778def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
779          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
780def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
781          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
782def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
783          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
784def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
785          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
786def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
787          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
788def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
789          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
790def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
791          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
792
793def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
794          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
795def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
796          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
797def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
798          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
799def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
800          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
801def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
802          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
803def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
804          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
805def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
806          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
807def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
808          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
809
810def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
811          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
812def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
813          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
814def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
815          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
816def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
817          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
818
819def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
820          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
821def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
822          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
823def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
824          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
825def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
826          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
827
828def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
829  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
830
831def INT_NVVM_D2I_LO : F_MATH_1<
832  !strconcat("{{\n\t",
833             ".reg .b32 %temp; \n\t",
834             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
835             "}}"),
836  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
837def INT_NVVM_D2I_HI : F_MATH_1<
838  !strconcat("{{\n\t",
839             ".reg .b32 %temp; \n\t",
840             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
841             "}}"),
842  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
843
844def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
845          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
846def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
847          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
848def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
849          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
850def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
851          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
852def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
853          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
854def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
855          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
856def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
857          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
858def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
859          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
860
861def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
862          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
863def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
864          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
865def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
866          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
867def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
868          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
869def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
870          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
871def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
872          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
873def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
874          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
875def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
876          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
877
878def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
879          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
880def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
881          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
882def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
883          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
884def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
885          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
886
887def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
888          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
889def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
890          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
891def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
892          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
893def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
894          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
895
896def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
897          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
898def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
899          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
900def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
901          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
902def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
903          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
904
905def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
906          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
907def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
908          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
909def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
910          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
911def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
912          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
913
914def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
915          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
916def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
917          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
918def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
919          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
920def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
921          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
922
923def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
924          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
925def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
926          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
927def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
928          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
929def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
930          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
931
932
933def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
934          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
935def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
936          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
937
938//
939// Bitcast
940//
941
942def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
943  Float32Regs, int_nvvm_bitcast_f2i>;
944def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
945  Int32Regs, int_nvvm_bitcast_i2f>;
946
947def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
948  Int64Regs, int_nvvm_bitcast_ll2d>;
949def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
950  Float64Regs, int_nvvm_bitcast_d2ll>;
951
952//
953// FNS
954//
955
956class INT_FNS_MBO<dag ins, dag Operands>
957  : NVPTXInst<(outs Int32Regs:$dst), ins,
958               "fns.b32 \t$dst, $mask, $base, $offset;",
959               [(set Int32Regs:$dst, Operands )]>,
960    Requires<[hasPTX60, hasSM30]>;
961
962def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
963                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
964def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
965                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
966def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
967                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
968def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
969                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
970def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
971                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
972def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
973                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
974def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
975                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
976def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
977                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
978
979//-----------------------------------
980// Atomic Functions
981//-----------------------------------
982
983class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
984 : PatFrag<ops, frag, AS_match.global>;
985class ATOMIC_SHARED_CHK <dag ops, dag frag>
986 : PatFrag<ops, frag, AS_match.shared>;
987class ATOMIC_GENERIC_CHK <dag ops, dag frag>
988 : PatFrag<ops, frag, AS_match.generic>;
989
990multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
991  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
992  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
993  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
994    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
995    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
996  Requires<Pred>;
997  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
998    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
999    [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1000  Requires<Pred>;
1001}
1002multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1003  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1004  list<Predicate> Pred = []> {
1005  defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1006    IntOp, IMMType, IMM, Pred>;
1007  defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1008    IntOp, IMMType, IMM, Pred>;
1009}
1010
1011// has 2 operands, neg the second one
1012multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1013  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1014  Operand IMMType, list<Predicate> Pred> {
1015  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1016    !strconcat(
1017      "{{ \n\t",
1018      ".reg \t.s", TypeStr, " temp; \n\t",
1019      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1020      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1021      "}}"),
1022    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1023  Requires<Pred>;
1024}
1025multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1026  string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
1027  list<Predicate> Pred = []> {
1028 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1029   IntOp, IMMType, Pred> ;
1030 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1031   IntOp, IMMType, Pred> ;
1032}
1033
1034// has 3 operands
1035multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1036  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1037  Operand IMMType, list<Predicate> Pred> {
1038  def reg : NVPTXInst<(outs regclass:$dst),
1039    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1040    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1041    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1042  Requires<Pred>;
1043
1044  def imm1 : NVPTXInst<(outs regclass:$dst),
1045    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1046    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1047    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1048  Requires<Pred>;
1049
1050  def imm2 : NVPTXInst<(outs regclass:$dst),
1051    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1052    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1053    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1054  Requires<Pred>;
1055
1056  def imm3 : NVPTXInst<(outs regclass:$dst),
1057    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1058    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1059    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1060  Requires<Pred>;
1061}
1062multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1063  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1064  defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1065    IntOp, IMMType, Pred>;
1066  defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1067    IntOp, IMMType, Pred>;
1068}
1069
1070// atom_add
1071
1072def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1073  (atomic_load_add_32 node:$a, node:$b)>;
1074def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1075  (atomic_load_add_32 node:$a, node:$b)>;
1076def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1077  (atomic_load_add_32 node:$a, node:$b)>;
1078def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1079  (atomic_load_add_64 node:$a, node:$b)>;
1080def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1081  (atomic_load_add_64 node:$a, node:$b)>;
1082def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1083  (atomic_load_add_64 node:$a, node:$b)>;
1084def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1085  (atomic_load_fadd node:$a, node:$b)>;
1086def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1087  (atomic_load_fadd node:$a, node:$b)>;
1088def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1089  (atomic_load_fadd node:$a, node:$b)>;
1090
1091defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1092  atomic_load_add_32_g, i32imm, imm>;
1093defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1094  atomic_load_add_32_s, i32imm, imm>;
1095defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1096  atomic_load_add_32_gen, i32imm, imm>;
1097defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1098  ".add", atomic_load_add_32_gen, i32imm, imm>;
1099
1100defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1101  atomic_load_add_64_g, i64imm, imm>;
1102defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1103  atomic_load_add_64_s, i64imm, imm>;
1104defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1105  atomic_load_add_64_gen, i64imm, imm>;
1106defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1107  ".add", atomic_load_add_64_gen, i64imm, imm>;
1108
1109defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1110  atomic_load_add_g, f32imm, fpimm>;
1111defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1112  atomic_load_add_s, f32imm, fpimm>;
1113defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1114  atomic_load_add_gen, f32imm, fpimm>;
1115
1116defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1117  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1118defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1119  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1120defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1121  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1122
1123// atom_sub
1124
1125def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1126  (atomic_load_sub_32 node:$a, node:$b)>;
1127def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1128  (atomic_load_sub_32 node:$a, node:$b)>;
1129def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1130  (atomic_load_sub_32 node:$a, node:$b)>;
1131def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1132  (atomic_load_sub_64 node:$a, node:$b)>;
1133def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1134  (atomic_load_sub_64 node:$a, node:$b)>;
1135def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1136  (atomic_load_sub_64 node:$a, node:$b)>;
1137
1138defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1139  atomic_load_sub_32_g, i32imm>;
1140defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1141  atomic_load_sub_64_g, i64imm>;
1142defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1143  atomic_load_sub_32_gen, i32imm>;
1144defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1145  ".add", atomic_load_sub_32_gen, i32imm>;
1146defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1147  atomic_load_sub_32_s, i32imm>;
1148defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1149  atomic_load_sub_64_s, i64imm>;
1150defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1151  atomic_load_sub_64_gen, i64imm>;
1152defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1153  ".add", atomic_load_sub_64_gen, i64imm>;
1154
1155// atom_swap
1156
1157def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1158  (atomic_swap_32 node:$a, node:$b)>;
1159def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1160  (atomic_swap_32 node:$a, node:$b)>;
1161def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1162  (atomic_swap_32 node:$a, node:$b)>;
1163def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1164  (atomic_swap_64 node:$a, node:$b)>;
1165def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1166  (atomic_swap_64 node:$a, node:$b)>;
1167def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1168  (atomic_swap_64 node:$a, node:$b)>;
1169
1170defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1171  atomic_swap_32_g, i32imm, imm>;
1172defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1173  atomic_swap_32_s, i32imm, imm>;
1174defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1175  atomic_swap_32_gen, i32imm, imm>;
1176defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1177  ".exch", atomic_swap_32_gen, i32imm, imm>;
1178defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1179  atomic_swap_64_g, i64imm, imm>;
1180defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1181  atomic_swap_64_s, i64imm, imm>;
1182defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1183  atomic_swap_64_gen, i64imm, imm>;
1184defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1185  ".exch", atomic_swap_64_gen, i64imm, imm>;
1186
1187// atom_max
1188
1189def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1190  , (atomic_load_max_32 node:$a, node:$b)>;
1191def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1192  (atomic_load_max_32 node:$a, node:$b)>;
1193def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1194  (atomic_load_max_32 node:$a, node:$b)>;
1195def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1196  , (atomic_load_max_64 node:$a, node:$b)>;
1197def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1198  (atomic_load_max_64 node:$a, node:$b)>;
1199def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1200  (atomic_load_max_64 node:$a, node:$b)>;
1201def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1202  (atomic_load_umax_32 node:$a, node:$b)>;
1203def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1204  (atomic_load_umax_32 node:$a, node:$b)>;
1205def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1206  (atomic_load_umax_32 node:$a, node:$b)>;
1207def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1208  (atomic_load_umax_64 node:$a, node:$b)>;
1209def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1210  (atomic_load_umax_64 node:$a, node:$b)>;
1211def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1212  (atomic_load_umax_64 node:$a, node:$b)>;
1213
1214defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1215  ".max", atomic_load_max_32_g, i32imm, imm>;
1216defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1217  ".max", atomic_load_max_32_s, i32imm, imm>;
1218defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1219  atomic_load_max_32_gen, i32imm, imm>;
1220defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1221  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1222defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1223  ".max", atomic_load_max_64_g, i64imm, imm>;
1224defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1225  ".max", atomic_load_max_64_s, i64imm, imm>;
1226defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1227  atomic_load_max_64_gen, i64imm, imm>;
1228defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1229  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1230defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1231  ".max", atomic_load_umax_32_g, i32imm, imm>;
1232defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1233  ".max", atomic_load_umax_32_s, i32imm, imm>;
1234defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1235  atomic_load_umax_32_gen, i32imm, imm>;
1236defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1237  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1238defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1239  ".max", atomic_load_umax_64_g, i64imm, imm>;
1240defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1241  ".max", atomic_load_umax_64_s, i64imm, imm>;
1242defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1243  atomic_load_umax_64_gen, i64imm, imm>;
1244defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1245  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1246
1247// atom_min
1248
1249def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1250  (atomic_load_min_32 node:$a, node:$b)>;
1251def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1252  (atomic_load_min_32 node:$a, node:$b)>;
1253def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1254  (atomic_load_min_32 node:$a, node:$b)>;
1255def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1256  (atomic_load_min_64 node:$a, node:$b)>;
1257def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1258  (atomic_load_min_64 node:$a, node:$b)>;
1259def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1260  (atomic_load_min_64 node:$a, node:$b)>;
1261def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1262  (atomic_load_umin_32 node:$a, node:$b)>;
1263def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1264  (atomic_load_umin_32 node:$a, node:$b)>;
1265def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1266  (atomic_load_umin_32 node:$a, node:$b)>;
1267def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1268  (atomic_load_umin_64 node:$a, node:$b)>;
1269def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1270  (atomic_load_umin_64 node:$a, node:$b)>;
1271def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1272  (atomic_load_umin_64 node:$a, node:$b)>;
1273
1274defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1275  ".min", atomic_load_min_32_g, i32imm, imm>;
1276defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1277  ".min", atomic_load_min_32_s, i32imm, imm>;
1278defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1279  atomic_load_min_32_gen, i32imm, imm>;
1280defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1281  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1282defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1283  ".min", atomic_load_min_64_g, i64imm, imm>;
1284defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1285  ".min", atomic_load_min_64_s, i64imm, imm>;
1286defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1287  atomic_load_min_64_gen, i64imm, imm>;
1288defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1289  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1290defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1291  ".min", atomic_load_umin_32_g, i32imm, imm>;
1292defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1293  ".min", atomic_load_umin_32_s, i32imm, imm>;
1294defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1295  atomic_load_umin_32_gen, i32imm, imm>;
1296defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1297  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1298defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1299  ".min", atomic_load_umin_64_g, i64imm, imm>;
1300defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1301  ".min", atomic_load_umin_64_s, i64imm, imm>;
1302defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1303  atomic_load_umin_64_gen, i64imm, imm>;
1304defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1305  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1306
1307// atom_inc  atom_dec
1308
1309def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1310  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1311def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1312  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1313def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1314  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1315def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1316  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1317def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1318  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1319def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1320  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1321
1322defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1323  atomic_load_inc_32_g, i32imm, imm>;
1324defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1325  atomic_load_inc_32_s, i32imm, imm>;
1326defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1327  atomic_load_inc_32_gen, i32imm, imm>;
1328defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1329  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1330defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1331  atomic_load_dec_32_g, i32imm, imm>;
1332defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1333  atomic_load_dec_32_s, i32imm, imm>;
1334defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1335  atomic_load_dec_32_gen, i32imm, imm>;
1336defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1337  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1338
1339// atom_and
1340
1341def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1342  (atomic_load_and_32 node:$a, node:$b)>;
1343def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1344  (atomic_load_and_32 node:$a, node:$b)>;
1345def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1346  (atomic_load_and_32 node:$a, node:$b)>;
1347def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1348  (atomic_load_and_64 node:$a, node:$b)>;
1349def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1350  (atomic_load_and_64 node:$a, node:$b)>;
1351def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1352  (atomic_load_and_64 node:$a, node:$b)>;
1353
1354defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1355  atomic_load_and_32_g, i32imm, imm>;
1356defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1357  atomic_load_and_32_s, i32imm, imm>;
1358defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1359  atomic_load_and_32_gen, i32imm, imm>;
1360defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1361  ".and", atomic_load_and_32_gen, i32imm, imm>;
1362defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1363  atomic_load_and_64_g, i64imm, imm>;
1364defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1365  atomic_load_and_64_s, i64imm, imm>;
1366defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1367  atomic_load_and_64_gen, i64imm, imm>;
1368defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1369  ".and", atomic_load_and_64_gen, i64imm, imm>;
1370
1371// atom_or
1372
1373def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1374  (atomic_load_or_32 node:$a, node:$b)>;
1375def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1376  (atomic_load_or_32 node:$a, node:$b)>;
1377def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1378  (atomic_load_or_32 node:$a, node:$b)>;
1379def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1380  (atomic_load_or_64 node:$a, node:$b)>;
1381def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1382  (atomic_load_or_64 node:$a, node:$b)>;
1383def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1384  (atomic_load_or_64 node:$a, node:$b)>;
1385
1386defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1387  atomic_load_or_32_g, i32imm, imm>;
1388defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1389  atomic_load_or_32_gen, i32imm, imm>;
1390defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1391  ".or", atomic_load_or_32_gen, i32imm, imm>;
1392defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1393  atomic_load_or_32_s, i32imm, imm>;
1394defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1395  atomic_load_or_64_g, i64imm, imm>;
1396defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1397  atomic_load_or_64_gen, i64imm, imm>;
1398defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1399  ".or", atomic_load_or_64_gen, i64imm, imm>;
1400defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1401  atomic_load_or_64_s, i64imm, imm>;
1402
1403// atom_xor
1404
1405def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1406  (atomic_load_xor_32 node:$a, node:$b)>;
1407def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1408  (atomic_load_xor_32 node:$a, node:$b)>;
1409def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1410  (atomic_load_xor_32 node:$a, node:$b)>;
1411def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1412  (atomic_load_xor_64 node:$a, node:$b)>;
1413def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1414  (atomic_load_xor_64 node:$a, node:$b)>;
1415def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1416  (atomic_load_xor_64 node:$a, node:$b)>;
1417
1418defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1419  atomic_load_xor_32_g, i32imm, imm>;
1420defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1421  atomic_load_xor_32_s, i32imm, imm>;
1422defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1423  atomic_load_xor_32_gen, i32imm, imm>;
1424defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1425  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1426defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1427  atomic_load_xor_64_g, i64imm, imm>;
1428defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1429  atomic_load_xor_64_s, i64imm, imm>;
1430defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1431  atomic_load_xor_64_gen, i64imm, imm>;
1432defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1433  ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1434
1435// atom_cas
1436
1437def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1438  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1439def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1440  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1441def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1442  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1443def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1444  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1445def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1446  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1447def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1448  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1449
1450defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1451  atomic_cmp_swap_32_g, i32imm>;
1452defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1453  atomic_cmp_swap_32_s, i32imm>;
1454defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1455  atomic_cmp_swap_32_gen, i32imm>;
1456defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1457  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1458defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1459  atomic_cmp_swap_64_g, i64imm>;
1460defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1461  atomic_cmp_swap_64_s, i64imm>;
1462defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1463  atomic_cmp_swap_64_gen, i64imm>;
1464defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1465  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1466
1467// Support for scoped atomic operations.  Matches
1468// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1469// and converts it into the appropriate instruction.
1470// NOTE: not all possible combinations are implemented
1471//  'space' is limited to generic as it's the only one needed to support CUDA.
1472//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1473class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1474                  dag ins, dag Operands>
1475      : NVPTXInst<(outs regclass:$result), ins,
1476                  AsmStr,
1477                  [(set regclass:$result, Operands)]>,
1478        Requires<Preds>;
1479
1480// Define instruction variants for all addressing modes.
1481multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1482                       NVPTXRegClass regclass, Operand ImmType,
1483                       SDNode Imm, ValueType ImmTy,
1484                       list<Predicate> Preds> {
1485  let AddedComplexity = 1 in {
1486    def : ATOM23_impl<AsmStr, regclass, Preds,
1487                      (ins Int32Regs:$src, regclass:$b),
1488                      (Intr Int32Regs:$src, regclass:$b)>;
1489    def : ATOM23_impl<AsmStr, regclass, Preds,
1490                      (ins Int64Regs:$src, regclass:$b),
1491                      (Intr Int64Regs:$src, regclass:$b)>;
1492  }
1493  // tablegen can't infer argument types from Intrinsic (though it can
1494  // from Instruction) so we have to enforce specific type on
1495  // immediates via explicit cast to ImmTy.
1496  def : ATOM23_impl<AsmStr, regclass, Preds,
1497                    (ins Int32Regs:$src, ImmType:$b),
1498                    (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1499  def : ATOM23_impl<AsmStr, regclass, Preds,
1500                    (ins Int64Regs:$src, ImmType:$b),
1501                    (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1502}
1503
1504multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1505                       NVPTXRegClass regclass, Operand ImmType,
1506                       SDNode Imm, ValueType ImmTy,
1507                       list<Predicate> Preds> {
1508  // Variants for register/immediate permutations of $b and $c
1509  let AddedComplexity = 2 in {
1510    def : ATOM23_impl<AsmStr, regclass, Preds,
1511                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
1512                      (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1513    def : ATOM23_impl<AsmStr, regclass, Preds,
1514                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
1515                      (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1516  }
1517  let AddedComplexity = 1 in {
1518    def : ATOM23_impl<AsmStr, regclass, Preds,
1519                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1520                      (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1521    def : ATOM23_impl<AsmStr, regclass, Preds,
1522                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1523                      (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1524    def : ATOM23_impl<AsmStr, regclass, Preds,
1525                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1526                      (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1527    def : ATOM23_impl<AsmStr, regclass, Preds,
1528                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1529                      (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1530  }
1531  def : ATOM23_impl<AsmStr, regclass, Preds,
1532                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1533                    (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1534  def : ATOM23_impl<AsmStr, regclass, Preds,
1535                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1536                    (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1537}
1538
1539// Constructs instrinsic name and instruction asm strings.
1540multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1541                       string ScopeStr, string SpaceStr,
1542                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1543                       ValueType ImmTy, list<Predicate> Preds> {
1544  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1545                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1546                            # "." # OpStr # "." # TypeStr
1547                            # " \t$result, [$src], $b;",
1548                     !cast<Intrinsic>(
1549                            "int_nvvm_atomic_" # OpStr
1550                            # "_" # SpaceStr # "_" # IntTypeStr
1551                            # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1552                     regclass, ImmType, Imm, ImmTy, Preds>;
1553}
1554multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1555                       string ScopeStr, string SpaceStr,
1556                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1557                       ValueType ImmTy, list<Predicate> Preds> {
1558  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1559                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1560                            # "." # OpStr # "." # TypeStr
1561                            # " \t$result, [$src], $b, $c;",
1562                     !cast<Intrinsic>(
1563                            "int_nvvm_atomic_" # OpStr
1564                            # "_" # SpaceStr # "_" # IntTypeStr
1565                            # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
1566                     regclass, ImmType, Imm, ImmTy, Preds>;
1567}
1568
1569// Constructs variants for different address spaces.
1570// For now we only need variants for generic space pointers.
1571multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1572                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1573                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1574   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1575                            regclass, ImmType, Imm, ImmTy, Preds>;
1576}
1577multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1578                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1579                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1580   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1581                            regclass, ImmType, Imm, ImmTy, Preds>;
1582}
1583
1584// Constructs variants for different scopes of atomic op.
1585multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1586                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1587                       ValueType ImmTy, list<Predicate> Preds> {
1588   // .gpu scope is default and is currently covered by existing
1589   // atomics w/o explicitly specified scope.
1590   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1591                           regclass, ImmType, Imm, ImmTy,
1592                           !listconcat(Preds,[hasAtomScope])>;
1593   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1594                           regclass, ImmType, Imm, ImmTy,
1595                           !listconcat(Preds,[hasAtomScope])>;
1596}
1597multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1598           NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1599           list<Predicate> Preds> {
1600   // No need to define ".gpu"-scoped atomics.  They do the same thing
1601   // as the regular, non-scoped atomics defined elsewhere.
1602   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1603                           regclass, ImmType, Imm, ImmTy,
1604                           !listconcat(Preds,[hasAtomScope])>;
1605   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1606                           regclass, ImmType, Imm, ImmTy,
1607                           !listconcat(Preds,[hasAtomScope])>;
1608}
1609
1610// atom.add
1611multiclass ATOM2_add_impl<string OpStr> {
1612   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1613   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1614   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1615   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1616                            []>;
1617   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1618                            [hasAtomAddF64]>;
1619}
1620
1621// atom.{and,or,xor}
1622multiclass ATOM2_bitwise_impl<string OpStr> {
1623   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1624   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1625                            [hasAtomBitwise64]>;
1626}
1627
1628// atom.exch
1629multiclass ATOM2_exch_impl<string OpStr> {
1630   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1631   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1632}
1633
1634// atom.{min,max}
1635multiclass ATOM2_minmax_impl<string OpStr> {
1636   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1637   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1638   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1639                            [hasAtomMinMax64]>;
1640   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1641                            [hasAtomMinMax64]>;
1642}
1643
1644// atom.{inc,dec}
1645multiclass ATOM2_incdec_impl<string OpStr> {
1646   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1647}
1648
1649// atom.cas
1650multiclass ATOM3_cas_impl<string OpStr> {
1651   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1652   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1653}
1654
1655defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1656defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1657defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1658defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1659defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1660defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1661defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1662defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1663defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1664defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1665
1666//-----------------------------------
1667// Support for ldu on sm_20 or later
1668//-----------------------------------
1669
1670// Don't annotate ldu instructions as mayLoad, as they load from memory that is
1671// read-only in a kernel.
1672
1673// Scalar
1674
1675multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1676  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1677               !strconcat("ldu.global.", TyStr),
1678                      []>, Requires<[hasLDU]>;
1679  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1680               !strconcat("ldu.global.", TyStr),
1681                        []>, Requires<[hasLDU]>;
1682 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1683               !strconcat("ldu.global.", TyStr),
1684                      []>, Requires<[hasLDU]>;
1685 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1686               !strconcat("ldu.global.", TyStr),
1687                      []>, Requires<[hasLDU]>;
1688 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1689               !strconcat("ldu.global.", TyStr),
1690                        []>, Requires<[hasLDU]>;
1691}
1692
1693defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1694defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1695defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1696defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1697defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1698defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1699defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1700defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1701defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1702defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1703
1704// vector
1705
1706// Elementized vector ldu
1707multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1708 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1709                     (ins Int32Regs:$src),
1710                     !strconcat("ldu.global.", TyStr), []>;
1711 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1712                     (ins Int64Regs:$src),
1713                     !strconcat("ldu.global.", TyStr), []>;
1714 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1715                     (ins MEMri:$src),
1716                     !strconcat("ldu.global.", TyStr), []>;
1717 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1718                     (ins MEMri64:$src),
1719                     !strconcat("ldu.global.", TyStr), []>;
1720 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1721                     (ins imemAny:$src),
1722                     !strconcat("ldu.global.", TyStr), []>;
1723}
1724
1725multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1726 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1727                            regclass:$dst4), (ins Int32Regs:$src),
1728               !strconcat("ldu.global.", TyStr), []>;
1729 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1730                            regclass:$dst4), (ins Int64Regs:$src),
1731               !strconcat("ldu.global.", TyStr), []>;
1732 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1733                            regclass:$dst4), (ins MEMri:$src),
1734               !strconcat("ldu.global.", TyStr), []>;
1735 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1736                            regclass:$dst4), (ins MEMri64:$src),
1737               !strconcat("ldu.global.", TyStr), []>;
1738 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1739                            regclass:$dst4), (ins imemAny:$src),
1740               !strconcat("ldu.global.", TyStr), []>;
1741}
1742
1743defm INT_PTX_LDU_G_v2i8_ELE
1744  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1745defm INT_PTX_LDU_G_v2i16_ELE
1746  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1747defm INT_PTX_LDU_G_v2i32_ELE
1748  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1749defm INT_PTX_LDU_G_v2f16_ELE
1750  : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1751defm INT_PTX_LDU_G_v2f16x2_ELE
1752  : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1753defm INT_PTX_LDU_G_v2f32_ELE
1754  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1755defm INT_PTX_LDU_G_v2i64_ELE
1756  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1757defm INT_PTX_LDU_G_v2f64_ELE
1758  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1759defm INT_PTX_LDU_G_v4i8_ELE
1760  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1761defm INT_PTX_LDU_G_v4i16_ELE
1762  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1763    Int16Regs>;
1764defm INT_PTX_LDU_G_v4i32_ELE
1765  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1766    Int32Regs>;
1767defm INT_PTX_LDU_G_v4f16_ELE
1768  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1769    Float16Regs>;
1770defm INT_PTX_LDU_G_v4f16x2_ELE
1771  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1772    Float16x2Regs>;
1773defm INT_PTX_LDU_G_v4f32_ELE
1774  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1775    Float32Regs>;
1776
1777
1778//-----------------------------------
1779// Support for ldg on sm_35 or later
1780//-----------------------------------
1781
1782// Don't annotate ld.global.nc as mayLoad, because these loads go through the
1783// non-coherent texture cache, and therefore the values read must be read-only
1784// during the lifetime of the kernel.
1785
1786multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
1787  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1788               !strconcat("ld.global.nc.", TyStr),
1789                      []>, Requires<[hasLDG]>;
1790  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1791               !strconcat("ld.global.nc.", TyStr),
1792                        []>, Requires<[hasLDG]>;
1793 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1794               !strconcat("ld.global.nc.", TyStr),
1795                      []>, Requires<[hasLDG]>;
1796 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1797               !strconcat("ld.global.nc.", TyStr),
1798                      []>, Requires<[hasLDG]>;
1799 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1800               !strconcat("ld.global.nc.", TyStr),
1801                        []>, Requires<[hasLDG]>;
1802}
1803
1804defm INT_PTX_LDG_GLOBAL_i8
1805  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
1806defm INT_PTX_LDG_GLOBAL_i16
1807  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
1808defm INT_PTX_LDG_GLOBAL_i32
1809  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1810defm INT_PTX_LDG_GLOBAL_i64
1811  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1812defm INT_PTX_LDG_GLOBAL_f16
1813  : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
1814defm INT_PTX_LDG_GLOBAL_f16x2
1815  : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
1816defm INT_PTX_LDG_GLOBAL_f32
1817  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
1818defm INT_PTX_LDG_GLOBAL_f64
1819  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
1820defm INT_PTX_LDG_GLOBAL_p32
1821  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
1822defm INT_PTX_LDG_GLOBAL_p64
1823  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
1824
1825// vector
1826
1827// Elementized vector ldg
1828multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1829 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1830                     (ins Int32Regs:$src),
1831                     !strconcat("ld.global.nc.", TyStr), []>;
1832 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1833                     (ins Int64Regs:$src),
1834                     !strconcat("ld.global.nc.", TyStr), []>;
1835 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1836                     (ins MEMri:$src),
1837                     !strconcat("ld.global.nc.", TyStr), []>;
1838 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1839                     (ins MEMri64:$src),
1840                     !strconcat("ld.global.nc.", TyStr), []>;
1841 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1842                     (ins imemAny:$src),
1843                     !strconcat("ld.global.nc.", TyStr), []>;
1844}
1845
1846multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1847  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1848                              regclass:$dst4), (ins Int32Regs:$src),
1849               !strconcat("ld.global.nc.", TyStr), []>;
1850  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1851                               regclass:$dst4), (ins Int64Regs:$src),
1852               !strconcat("ld.global.nc.", TyStr), []>;
1853  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1854                              regclass:$dst4), (ins MEMri:$src),
1855               !strconcat("ld.global.nc.", TyStr), []>;
1856  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1857                              regclass:$dst4), (ins MEMri64:$src),
1858               !strconcat("ld.global.nc.", TyStr), []>;
1859  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1860                             regclass:$dst4), (ins imemAny:$src),
1861               !strconcat("ld.global.nc.", TyStr), []>;
1862}
1863
1864// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
1865defm INT_PTX_LDG_G_v2i8_ELE
1866  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1867defm INT_PTX_LDG_G_v2i16_ELE
1868  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1869defm INT_PTX_LDG_G_v2i32_ELE
1870  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1871defm INT_PTX_LDG_G_v2f16_ELE
1872  : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1873defm INT_PTX_LDG_G_v2f16x2_ELE
1874  : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1875defm INT_PTX_LDG_G_v2f32_ELE
1876  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1877defm INT_PTX_LDG_G_v2i64_ELE
1878  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1879defm INT_PTX_LDG_G_v2f64_ELE
1880  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1881defm INT_PTX_LDG_G_v4i8_ELE
1882  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1883defm INT_PTX_LDG_G_v4i16_ELE
1884  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1885defm INT_PTX_LDG_G_v4i32_ELE
1886  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
1887defm INT_PTX_LDG_G_v4f16_ELE
1888  : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
1889defm INT_PTX_LDG_G_v4f16x2_ELE
1890  : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
1891defm INT_PTX_LDG_G_v4f32_ELE
1892  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
1893
1894
1895multiclass NG_TO_G<string Str, Intrinsic Intrin> {
1896   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1897          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
1898      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1899   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1900          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
1901      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1902   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
1903          "{{ .reg .b64 %tmp;\n\t"
1904          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
1905          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
1906      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
1907      Requires<[useShortPtr]>;
1908}
1909
1910multiclass G_TO_NG<string Str, Intrinsic Intrin> {
1911   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
1912          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
1913      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
1914   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
1915          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
1916      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
1917   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
1918          "{{ .reg .b64 %tmp;\n\t"
1919          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
1920          #"  cvt.u32.u64 \t$result, %tmp; }}",
1921      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
1922      Requires<[useShortPtr]>;
1923}
1924
1925defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
1926defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
1927defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
1928defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
1929
1930defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
1931defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
1932defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
1933defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
1934
1935
1936// nvvm.ptr.gen.to.param
1937def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
1938  (ins Int32Regs:$src),
1939                        "mov.u32 \t$result, $src;",
1940                              [(set Int32Regs:$result,
1941                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
1942def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
1943  (ins Int64Regs:$src),
1944                        "mov.u64 \t$result, $src;",
1945                              [(set Int64Regs:$result,
1946                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
1947
1948
1949// nvvm.move intrinsicc
1950def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
1951                             "mov.b16 \t$r, $s;",
1952                             [(set Int16Regs:$r,
1953                               (int_nvvm_move_i16 Int16Regs:$s))]>;
1954def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1955                             "mov.b32 \t$r, $s;",
1956                             [(set Int32Regs:$r,
1957                               (int_nvvm_move_i32 Int32Regs:$s))]>;
1958def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1959                             "mov.b64 \t$r, $s;",
1960                             [(set Int64Regs:$r,
1961                               (int_nvvm_move_i64 Int64Regs:$s))]>;
1962def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
1963                             "mov.f32 \t$r, $s;",
1964                             [(set Float32Regs:$r,
1965                               (int_nvvm_move_float Float32Regs:$s))]>;
1966def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
1967                             "mov.f64 \t$r, $s;",
1968                             [(set Float64Regs:$r,
1969                               (int_nvvm_move_double Float64Regs:$s))]>;
1970def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
1971                             "mov.u32 \t$r, $s;",
1972                             [(set Int32Regs:$r,
1973                               (int_nvvm_move_ptr Int32Regs:$s))]>;
1974def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
1975                             "mov.u64 \t$r, $s;",
1976                             [(set Int64Regs:$r,
1977                               (int_nvvm_move_ptr Int64Regs:$s))]>;
1978
1979// @TODO: Are these actually needed, or will we always just see symbols
1980// copied to registers first?
1981/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
1982                             "mov.u32 \t$r, $s;",
1983                             [(set Int32Regs:$r,
1984                             (int_nvvm_move_ptr texternalsym:$s))]>;
1985def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
1986                             "mov.u64 \t$r, $s;",
1987                             [(set Int64Regs:$r,
1988                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
1989
1990
1991// MoveParam        %r1, param
1992// ptr_local_to_gen %r2, %r1
1993// ptr_gen_to_local %r3, %r2
1994// ->
1995// mov %r1, param
1996
1997// @TODO: Revisit this.  There is a type
1998// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
1999// instructions are not currently defined. However, we can use the ptr
2000// variants and the asm printer will do the right thing.
2001def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2002                (MoveParam texternalsym:$src)))),
2003               (nvvm_move_ptr64  texternalsym:$src)>;
2004def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2005                (MoveParam texternalsym:$src)))),
2006               (nvvm_move_ptr32  texternalsym:$src)>;
2007
2008def texsurf_handles
2009  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2010              "mov.u64 \t$result, $src;", []>;
2011
2012//-----------------------------------
2013// Compiler Error Warn
2014// - Just ignore them in codegen
2015//-----------------------------------
2016
2017def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2018                "// llvm.nvvm.compiler.warn()",
2019                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2020def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2021                "// llvm.nvvm.compiler.warn()",
2022                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2023def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2024                "// llvm.nvvm.compiler.error()",
2025                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2026def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2027                "// llvm.nvvm.compiler.error()",
2028                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2029
2030
2031// isspacep
2032
2033def ISSPACEP_CONST_32
2034  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2035              "isspacep.const \t$d, $a;",
2036              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2037    Requires<[hasPTX31]>;
2038def ISSPACEP_CONST_64
2039  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2040              "isspacep.const \t$d, $a;",
2041              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2042    Requires<[hasPTX31]>;
2043def ISSPACEP_GLOBAL_32
2044  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2045              "isspacep.global \t$d, $a;",
2046              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2047def ISSPACEP_GLOBAL_64
2048  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2049              "isspacep.global \t$d, $a;",
2050              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2051def ISSPACEP_LOCAL_32
2052  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2053              "isspacep.local \t$d, $a;",
2054              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2055def ISSPACEP_LOCAL_64
2056  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2057              "isspacep.local \t$d, $a;",
2058              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2059def ISSPACEP_SHARED_32
2060  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2061              "isspacep.shared \t$d, $a;",
2062              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2063def ISSPACEP_SHARED_64
2064  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2065              "isspacep.shared \t$d, $a;",
2066              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2067
2068
2069// Special register reads
2070def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2071                            (ins SpecialRegs:$r),
2072                            "mov.b32 \t$d, $r;", []>;
2073
2074def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2075def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2076def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2077def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2078def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2079def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2080def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2081def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2082def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2083def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2084def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2085def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2086def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2087def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2088def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2089def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2090def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2091def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2092def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2093def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2094def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2095def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2096def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2097def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2098def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2099def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2100def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2101def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2102def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2103def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2104def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2105def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2106
2107
2108// rotate builtin support
2109
2110def ROTATE_B32_HW_IMM
2111  : NVPTXInst<(outs Int32Regs:$dst),
2112              (ins  Int32Regs:$src, i32imm:$amt),
2113              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2114              [(set Int32Regs:$dst,
2115                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2116              Requires<[hasHWROT32]> ;
2117
2118def ROTATE_B32_HW_REG
2119  : NVPTXInst<(outs Int32Regs:$dst),
2120              (ins  Int32Regs:$src, Int32Regs:$amt),
2121              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2122              [(set Int32Regs:$dst,
2123                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2124              Requires<[hasHWROT32]> ;
2125
2126def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2127          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2128      Requires<[noHWROT32]> ;
2129
2130def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2131          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2132      Requires<[noHWROT32]> ;
2133
2134let hasSideEffects = 0 in {
2135  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2136    !strconcat("{{\n\t",
2137               ".reg .b32 %dummy;\n\t",
2138               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2139               "}}"),
2140          []> ;
2141
2142  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2143    !strconcat("{{\n\t",
2144               ".reg .b32 %dummy;\n\t",
2145               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2146               "}}"),
2147          []> ;
2148}
2149
2150let hasSideEffects = 0 in {
2151  def PACK_TWO_INT32
2152    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2153                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2154}
2155
2156def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2157          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2158                          (GET_LO_INT64 Int64Regs:$src))> ;
2159
2160// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2161// no side effects.
2162let hasSideEffects = 0 in {
2163  def SHF_L_WRAP_B32_IMM
2164    : NVPTXInst<(outs Int32Regs:$dst),
2165                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2166                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2167      Requires<[hasHWROT32]>;
2168
2169  def SHF_L_WRAP_B32_REG
2170    : NVPTXInst<(outs Int32Regs:$dst),
2171                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2172                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2173      Requires<[hasHWROT32]>;
2174
2175  def SHF_R_WRAP_B32_IMM
2176    : NVPTXInst<(outs Int32Regs:$dst),
2177                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2178                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2179      Requires<[hasHWROT32]>;
2180
2181  def SHF_R_WRAP_B32_REG
2182    : NVPTXInst<(outs Int32Regs:$dst),
2183                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2184                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2185      Requires<[hasHWROT32]>;
2186}
2187
2188// HW version of rotate 64
2189def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2190          (PACK_TWO_INT32
2191            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2192                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2193            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2194                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2195      Requires<[hasHWROT32]>;
2196
2197def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2198          (PACK_TWO_INT32
2199            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2200                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2201            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2202                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2203      Requires<[hasHWROT32]>;
2204
2205
2206def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2207          (PACK_TWO_INT32
2208            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2209                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2210            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2211                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2212      Requires<[hasHWROT32]>;
2213
2214def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2215          (PACK_TWO_INT32
2216            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2217                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2218            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2219                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2220      Requires<[hasHWROT32]>;
2221
2222// SW version of rotate 64
2223def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2224          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2225      Requires<[noHWROT32]>;
2226def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2227          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2228      Requires<[noHWROT32]>;
2229def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2230          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2231      Requires<[noHWROT32]>;
2232def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2233          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2234      Requires<[noHWROT32]>;
2235
2236
2237//-----------------------------------
2238// Texture Intrinsics
2239//-----------------------------------
2240
2241// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2242// also defined in NVPTXReplaceImageHandles.cpp
2243
2244// texmode_independent
2245let IsTex = 1, IsTexModeUnified = 0 in {
2246// Texture fetch instructions using handles
2247def TEX_1D_F32_S32
2248  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2249                    Float32Regs:$b, Float32Regs:$a),
2250              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2251              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2252              []>;
2253def TEX_1D_F32_F32
2254  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2255                    Float32Regs:$b, Float32Regs:$a),
2256              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2257              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2258              []>;
2259def TEX_1D_F32_F32_LEVEL
2260  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2261                    Float32Regs:$b, Float32Regs:$a),
2262              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
2263              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2264              "[$t, $s, \\{$x\\}], $lod;",
2265              []>;
2266def TEX_1D_F32_F32_GRAD
2267  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2268                    Float32Regs:$b, Float32Regs:$a),
2269              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2270                   Float32Regs:$gradx, Float32Regs:$grady),
2271              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2272              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2273              []>;
2274def TEX_1D_S32_S32
2275  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2276                    Int32Regs:$b, Int32Regs:$a),
2277              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2278              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2279              []>;
2280def TEX_1D_S32_F32
2281  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2282                    Int32Regs:$b, Int32Regs:$a),
2283              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2284              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2285              []>;
2286def TEX_1D_S32_F32_LEVEL
2287  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2288                    Int32Regs:$b, Int32Regs:$a),
2289              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2290                   Float32Regs:$lod),
2291              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2292              "[$t, $s, \\{$x\\}], $lod;",
2293              []>;
2294def TEX_1D_S32_F32_GRAD
2295  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2296                    Int32Regs:$b, Int32Regs:$a),
2297              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2298                   Float32Regs:$gradx, Float32Regs:$grady),
2299              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2300              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2301              []>;
2302def TEX_1D_U32_S32
2303  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2304                    Int32Regs:$b, Int32Regs:$a),
2305              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
2306              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2307              []>;
2308def TEX_1D_U32_F32
2309  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2310                    Int32Regs:$b, Int32Regs:$a),
2311              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
2312              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2313              []>;
2314def TEX_1D_U32_F32_LEVEL
2315  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2316                    Int32Regs:$b, Int32Regs:$a),
2317              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2318                   Float32Regs:$lod),
2319              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2320              "[$t, $s, \\{$x\\}], $lod;",
2321              []>;
2322def TEX_1D_U32_F32_GRAD
2323  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2324                    Int32Regs:$b, Int32Regs:$a),
2325              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
2326                   Float32Regs:$gradx, Float32Regs:$grady),
2327              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2328              "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2329              []>;
2330
2331def TEX_1D_ARRAY_F32_S32
2332  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2333                    Float32Regs:$b, Float32Regs:$a),
2334              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2335              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2336              "[$t, $s, \\{$l, $x\\}];",
2337              []>;
2338def TEX_1D_ARRAY_F32_F32
2339  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2340                    Float32Regs:$b, Float32Regs:$a),
2341              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2342              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2343              "[$t, $s, \\{$l, $x\\}];",
2344              []>;
2345def TEX_1D_ARRAY_F32_F32_LEVEL
2346  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2347                    Float32Regs:$b, Float32Regs:$a),
2348              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2349                   Float32Regs:$lod),
2350              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2351              "[$t, $s, \\{$l, $x\\}], $lod;",
2352              []>;
2353def TEX_1D_ARRAY_F32_F32_GRAD
2354  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2355                    Float32Regs:$b, Float32Regs:$a),
2356              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2357                   Float32Regs:$gradx, Float32Regs:$grady),
2358              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2359              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2360              []>;
2361def TEX_1D_ARRAY_S32_S32
2362  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2363                    Int32Regs:$b, Int32Regs:$a),
2364              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2365              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2366              "[$t, $s, \\{$l, $x\\}];",
2367              []>;
2368def TEX_1D_ARRAY_S32_F32
2369  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2370                    Int32Regs:$b, Int32Regs:$a),
2371              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2372              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2373              "[$t, $s, \\{$l, $x\\}];",
2374              []>;
2375def TEX_1D_ARRAY_S32_F32_LEVEL
2376  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2377                    Int32Regs:$b, Int32Regs:$a),
2378              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2379                   Float32Regs:$lod),
2380              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2381              "[$t, $s, \\{$l, $x\\}], $lod;",
2382              []>;
2383def TEX_1D_ARRAY_S32_F32_GRAD
2384  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2385                    Int32Regs:$b, Int32Regs:$a),
2386              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2387                   Float32Regs:$gradx, Float32Regs:$grady),
2388              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2389              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2390              []>;
2391def TEX_1D_ARRAY_U32_S32
2392  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2393                    Int32Regs:$b, Int32Regs:$a),
2394              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
2395              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2396              "[$t, $s, \\{$l, $x\\}];",
2397              []>;
2398def TEX_1D_ARRAY_U32_F32
2399  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2400                    Int32Regs:$b, Int32Regs:$a),
2401              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
2402              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2403              "[$t, $s, \\{$l, $x\\}];",
2404              []>;
2405def TEX_1D_ARRAY_U32_F32_LEVEL
2406  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2407                    Int32Regs:$b, Int32Regs:$a),
2408              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2409                   Float32Regs:$lod),
2410              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2411              "[$t, $s, \\{$l, $x\\}], $lod;",
2412              []>;
2413def TEX_1D_ARRAY_U32_F32_GRAD
2414  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2415                    Int32Regs:$b, Int32Regs:$a),
2416              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2417                   Float32Regs:$gradx, Float32Regs:$grady),
2418              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2419              "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
2420              []>;
2421
2422def TEX_2D_F32_S32
2423  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2424                    Float32Regs:$b, Float32Regs:$a),
2425              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2426              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2427              "[$t, $s, \\{$x, $y\\}];",
2428              []>;
2429def TEX_2D_F32_F32
2430  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2431                    Float32Regs:$b, Float32Regs:$a),
2432              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2433              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2434              "[$t, $s, \\{$x, $y\\}];",
2435              []>;
2436def TEX_2D_F32_F32_LEVEL
2437  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2438                    Float32Regs:$b, Float32Regs:$a),
2439              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2440                   Float32Regs:$lod),
2441              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2442              "[$t, $s, \\{$x, $y\\}], $lod;",
2443              []>;
2444def TEX_2D_F32_F32_GRAD
2445  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2446                    Float32Regs:$b, Float32Regs:$a),
2447              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2448                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2449                   Float32Regs:$grady0, Float32Regs:$grady1),
2450              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2451              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2452              "\\{$grady0, $grady1\\};",
2453              []>;
2454def TEX_2D_S32_S32
2455  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2456                    Int32Regs:$b, Int32Regs:$a),
2457              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2458              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2459              "[$t, $s, \\{$x, $y\\}];",
2460              []>;
2461def TEX_2D_S32_F32
2462  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2463                    Int32Regs:$b, Int32Regs:$a),
2464              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2465              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2466              "[$t, $s, \\{$x, $y\\}];",
2467              []>;
2468def TEX_2D_S32_F32_LEVEL
2469  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2470                    Int32Regs:$b, Int32Regs:$a),
2471              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2472                   Float32Regs:$lod),
2473              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2474              "[$t, $s, \\{$x, $y\\}], $lod;",
2475              []>;
2476def TEX_2D_S32_F32_GRAD
2477  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2478                    Int32Regs:$b, Int32Regs:$a),
2479              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2480                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2481                   Float32Regs:$grady0, Float32Regs:$grady1),
2482              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2483              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2484              "\\{$grady0, $grady1\\};",
2485              []>;
2486def TEX_2D_U32_S32
2487  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2488                    Int32Regs:$b, Int32Regs:$a),
2489              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
2490              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2491              "[$t, $s, \\{$x, $y\\}];",
2492              []>;
2493def TEX_2D_U32_F32
2494  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2495                    Int32Regs:$b, Int32Regs:$a),
2496              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2497              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2498              "[$t, $s, \\{$x, $y\\}];",
2499              []>;
2500def TEX_2D_U32_F32_LEVEL
2501  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2502                    Int32Regs:$b, Int32Regs:$a),
2503              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2504                   Float32Regs:$lod),
2505              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2506              "[$t, $s, \\{$x, $y\\}], $lod;",
2507              []>;
2508def TEX_2D_U32_F32_GRAD
2509  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2510                    Int32Regs:$b, Int32Regs:$a),
2511              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2512                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2513                   Float32Regs:$grady0, Float32Regs:$grady1),
2514              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2515              "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
2516              "\\{$grady0, $grady1\\};",
2517              []>;
2518
2519def TEX_2D_ARRAY_F32_S32
2520  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2521                    Float32Regs:$b, Float32Regs:$a),
2522              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2523                   Int32Regs:$y),
2524              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2525              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2526              []>;
2527def TEX_2D_ARRAY_F32_F32
2528  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2529                    Float32Regs:$b, Float32Regs:$a),
2530              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2531                   Float32Regs:$y),
2532              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2533              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2534              []>;
2535def TEX_2D_ARRAY_F32_F32_LEVEL
2536  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2537                    Float32Regs:$b, Float32Regs:$a),
2538              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2539                   Float32Regs:$y, Float32Regs:$lod),
2540              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2541              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2542              []>;
2543def TEX_2D_ARRAY_F32_F32_GRAD
2544  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2545                    Float32Regs:$b, Float32Regs:$a),
2546              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2547                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
2548                   Float32Regs:$grady0, Float32Regs:$grady1),
2549              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2550              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2551              "\\{$grady0, $grady1\\};",
2552              []>;
2553def TEX_2D_ARRAY_S32_S32
2554  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2555                    Int32Regs:$b, Int32Regs:$a),
2556              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2557                   Int32Regs:$y),
2558              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2559              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2560              []>;
2561def TEX_2D_ARRAY_S32_F32
2562  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2563                    Int32Regs:$b, Int32Regs:$a),
2564              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2565                   Float32Regs:$y),
2566              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2567              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2568              []>;
2569def TEX_2D_ARRAY_S32_F32_LEVEL
2570  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2571                    Int32Regs:$b, Int32Regs:$a),
2572              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2573                   Float32Regs:$y, Float32Regs:$lod),
2574              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2575              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2576              []>;
2577def TEX_2D_ARRAY_S32_F32_GRAD
2578  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2579                    Int32Regs:$b, Int32Regs:$a),
2580              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2581                   Float32Regs:$y,
2582                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2583                   Float32Regs:$grady0, Float32Regs:$grady1),
2584              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2585              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2586              "\\{$grady0, $grady1\\};",
2587              []>;
2588def TEX_2D_ARRAY_U32_S32
2589  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2590                    Int32Regs:$b, Int32Regs:$a),
2591              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
2592                   Int32Regs:$y),
2593              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2594              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2595              []>;
2596def TEX_2D_ARRAY_U32_F32
2597  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2598                    Int32Regs:$b, Int32Regs:$a),
2599              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2600                   Float32Regs:$y),
2601              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2602              "[$t, $s, \\{$l, $x, $y, $y\\}];",
2603              []>;
2604def TEX_2D_ARRAY_U32_F32_LEVEL
2605  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2606                    Int32Regs:$b, Int32Regs:$a),
2607              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2608                   Float32Regs:$y, Float32Regs:$lod),
2609              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2610              "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2611              []>;
2612def TEX_2D_ARRAY_U32_F32_GRAD
2613  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2614                    Int32Regs:$b, Int32Regs:$a),
2615              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
2616                   Float32Regs:$y,
2617                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2618                   Float32Regs:$grady0, Float32Regs:$grady1),
2619              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2620              "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
2621              "\\{$grady0, $grady1\\};",
2622              []>;
2623
2624def TEX_3D_F32_S32
2625  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2626                    Float32Regs:$b, Float32Regs:$a),
2627              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2628                   Int32Regs:$z),
2629              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
2630              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2631              []>;
2632def TEX_3D_F32_F32
2633  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2634                    Float32Regs:$b, Float32Regs:$a),
2635              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2636                   Float32Regs:$z),
2637              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2638              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2639              []>;
2640def TEX_3D_F32_F32_LEVEL
2641  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2642                    Float32Regs:$b, Float32Regs:$a),
2643              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2644                   Float32Regs:$z, Float32Regs:$lod),
2645              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2646              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2647              []>;
2648def TEX_3D_F32_F32_GRAD
2649  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2650                    Float32Regs:$b, Float32Regs:$a),
2651              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2652                   Float32Regs:$z,
2653                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2654                   Float32Regs:$gradx2, Float32Regs:$grady0,
2655                   Float32Regs:$grady1, Float32Regs:$grady2),
2656              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2657              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2658              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2659              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2660              []>;
2661def TEX_3D_S32_S32
2662  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2663                    Int32Regs:$b, Int32Regs:$a),
2664              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2665                   Int32Regs:$z),
2666              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
2667              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2668              []>;
2669def TEX_3D_S32_F32
2670  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2671                    Int32Regs:$b, Int32Regs:$a),
2672              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2673                   Float32Regs:$z),
2674              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2675              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2676              []>;
2677def TEX_3D_S32_F32_LEVEL
2678  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2679                    Int32Regs:$b, Int32Regs:$a),
2680              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2681                   Float32Regs:$z, Float32Regs:$lod),
2682              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2683              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2684              []>;
2685def TEX_3D_S32_F32_GRAD
2686  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2687                    Int32Regs:$b, Int32Regs:$a),
2688              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2689                   Float32Regs:$z,
2690                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2691                   Float32Regs:$gradx2, Float32Regs:$grady0,
2692                   Float32Regs:$grady1, Float32Regs:$grady2),
2693              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2694              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2695              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2696              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2697              []>;
2698def TEX_3D_U32_S32
2699  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2700                    Int32Regs:$b, Int32Regs:$a),
2701              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
2702                   Int32Regs:$z),
2703              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
2704              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2705              []>;
2706def TEX_3D_U32_F32
2707  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2708                    Int32Regs:$b, Int32Regs:$a),
2709              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2710                   Float32Regs:$z),
2711              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2712              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2713              []>;
2714def TEX_3D_U32_F32_LEVEL
2715  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2716                    Int32Regs:$b, Int32Regs:$a),
2717              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2718                   Float32Regs:$z, Float32Regs:$lod),
2719              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2720              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2721              []>;
2722def TEX_3D_U32_F32_GRAD
2723  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2724                    Int32Regs:$b, Int32Regs:$a),
2725              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
2726                   Float32Regs:$z,
2727                   Float32Regs:$gradx0, Float32Regs:$gradx1,
2728                   Float32Regs:$gradx2, Float32Regs:$grady0,
2729                   Float32Regs:$grady1, Float32Regs:$grady2),
2730              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2731              "[$t, $s, \\{$x, $y, $z, $z\\}], "
2732              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
2733              "\\{$grady0, $grady1, $grady2, $grady2\\};",
2734              []>;
2735
2736def TEX_CUBE_F32_F32
2737  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2738                    Float32Regs:$b, Float32Regs:$a),
2739              (ins Int64Regs:$t, Int64Regs:$s,
2740               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2741              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2742              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2743              []>;
2744def TEX_CUBE_F32_F32_LEVEL
2745  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2746                    Float32Regs:$b, Float32Regs:$a),
2747              (ins Int64Regs:$t, Int64Regs:$s,
2748                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2749                   Float32Regs:$lod),
2750              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2751              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2752              []>;
2753def TEX_CUBE_S32_F32
2754  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2755                    Int32Regs:$b, Int32Regs:$a),
2756              (ins Int64Regs:$t, Int64Regs:$s,
2757                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2758              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2759              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2760              []>;
2761def TEX_CUBE_S32_F32_LEVEL
2762  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2763                    Int32Regs:$b, Int32Regs:$a),
2764              (ins Int64Regs:$t, Int64Regs:$s,
2765                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2766                   Float32Regs:$lod),
2767              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2768              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2769              []>;
2770def TEX_CUBE_U32_F32
2771  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2772                    Int32Regs:$b, Int32Regs:$a),
2773              (ins Int64Regs:$t, Int64Regs:$s,
2774                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2775              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2776              "[$t, $s, \\{$x, $y, $z, $z\\}];",
2777              []>;
2778def TEX_CUBE_U32_F32_LEVEL
2779  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2780                    Int32Regs:$b, Int32Regs:$a),
2781              (ins Int64Regs:$t, Int64Regs:$s,
2782                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2783                   Float32Regs:$lod),
2784              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2785              "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2786              []>;
2787
2788def TEX_CUBE_ARRAY_F32_F32
2789  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2790                    Float32Regs:$b, Float32Regs:$a),
2791              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2792               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2793              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2794              "[$t, $s, \\{$l, $x, $y, $z\\}];",
2795              []>;
2796def TEX_CUBE_ARRAY_F32_F32_LEVEL
2797  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2798                    Float32Regs:$b, Float32Regs:$a),
2799              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2800                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2801                   Float32Regs:$lod),
2802              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2803              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2804              []>;
2805def TEX_CUBE_ARRAY_S32_F32
2806  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2807                    Int32Regs:$b, Int32Regs:$a),
2808              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2809                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2810              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2811              "[$t, $s, \\{$l, $x, $y, $z\\}];",
2812              []>;
2813def TEX_CUBE_ARRAY_S32_F32_LEVEL
2814  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2815                    Int32Regs:$b, Int32Regs:$a),
2816              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2817                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2818                   Float32Regs:$lod),
2819              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2820              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2821              []>;
2822def TEX_CUBE_ARRAY_U32_F32
2823  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2824                    Int32Regs:$b, Int32Regs:$a),
2825              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2826                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
2827              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2828              "[$t, $s, \\{$l, $x, $y, $z\\}];",
2829              []>;
2830def TEX_CUBE_ARRAY_U32_F32_LEVEL
2831  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2832                    Int32Regs:$b, Int32Regs:$a),
2833              (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
2834                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
2835                   Float32Regs:$lod),
2836              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
2837              "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2838              []>;
2839
2840def TLD4_R_2D_F32_F32
2841  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2842                    Float32Regs:$v2, Float32Regs:$v3),
2843              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2844              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2845              "[$t, $s, \\{$x, $y\\}];",
2846              []>;
2847def TLD4_G_2D_F32_F32
2848  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2849                    Float32Regs:$v2, Float32Regs:$v3),
2850              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2851              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2852              "[$t, $s, \\{$x, $y\\}];",
2853              []>;
2854def TLD4_B_2D_F32_F32
2855  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2856                    Float32Regs:$v2, Float32Regs:$v3),
2857              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2858              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2859              "[$t, $s, \\{$x, $y\\}];",
2860              []>;
2861def TLD4_A_2D_F32_F32
2862  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
2863                    Float32Regs:$v2, Float32Regs:$v3),
2864              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2865              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2866              "[$t, $s, \\{$x, $y\\}];",
2867              []>;
2868def TLD4_R_2D_S32_F32
2869  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2870                    Int32Regs:$v2, Int32Regs:$v3),
2871              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2872              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2873              "[$t, $s, \\{$x, $y\\}];",
2874              []>;
2875def TLD4_G_2D_S32_F32
2876  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2877                    Int32Regs:$v2, Int32Regs:$v3),
2878              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2879              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2880              "[$t, $s, \\{$x, $y\\}];",
2881              []>;
2882def TLD4_B_2D_S32_F32
2883  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2884                    Int32Regs:$v2, Int32Regs:$v3),
2885              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2886              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2887              "[$t, $s, \\{$x, $y\\}];",
2888              []>;
2889def TLD4_A_2D_S32_F32
2890  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2891                    Int32Regs:$v2, Int32Regs:$v3),
2892              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2893              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2894              "[$t, $s, \\{$x, $y\\}];",
2895              []>;
2896def TLD4_R_2D_U32_F32
2897  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2898                    Int32Regs:$v2, Int32Regs:$v3),
2899              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2900              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2901              "[$t, $s, \\{$x, $y\\}];",
2902              []>;
2903def TLD4_G_2D_U32_F32
2904  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2905                    Int32Regs:$v2, Int32Regs:$v3),
2906              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2907              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2908              "[$t, $s, \\{$x, $y\\}];",
2909              []>;
2910def TLD4_B_2D_U32_F32
2911  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2912                    Int32Regs:$v2, Int32Regs:$v3),
2913              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2914              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2915              "[$t, $s, \\{$x, $y\\}];",
2916              []>;
2917def TLD4_A_2D_U32_F32
2918  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
2919                    Int32Regs:$v2, Int32Regs:$v3),
2920              (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
2921              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
2922              "[$t, $s, \\{$x, $y\\}];",
2923              []>;
2924}
2925
2926
2927// texmode_unified
2928let IsTex = 1, IsTexModeUnified = 1 in {
2929// Texture fetch instructions using handles
2930def TEX_UNIFIED_1D_F32_S32
2931  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2932                    Float32Regs:$b, Float32Regs:$a),
2933              (ins Int64Regs:$t, Int32Regs:$x),
2934              "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2935              []>;
2936def TEX_UNIFIED_1D_F32_F32
2937  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2938                    Float32Regs:$b, Float32Regs:$a),
2939              (ins Int64Regs:$t, Float32Regs:$x),
2940              "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2941              []>;
2942def TEX_UNIFIED_1D_F32_F32_LEVEL
2943  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2944                    Float32Regs:$b, Float32Regs:$a),
2945              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
2946              "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2947              "[$t, \\{$x\\}], $lod;",
2948              []>;
2949def TEX_UNIFIED_1D_F32_F32_GRAD
2950  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
2951                    Float32Regs:$b, Float32Regs:$a),
2952              (ins Int64Regs:$t, Float32Regs:$x,
2953                   Float32Regs:$gradx, Float32Regs:$grady),
2954              "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
2955              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2956              []>;
2957def TEX_UNIFIED_1D_S32_S32
2958  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2959                    Int32Regs:$b, Int32Regs:$a),
2960              (ins Int64Regs:$t, Int32Regs:$x),
2961              "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2962              []>;
2963def TEX_UNIFIED_1D_S32_F32
2964  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2965                    Int32Regs:$b, Int32Regs:$a),
2966              (ins Int64Regs:$t, Float32Regs:$x),
2967              "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2968              []>;
2969def TEX_UNIFIED_1D_S32_F32_LEVEL
2970  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2971                    Int32Regs:$b, Int32Regs:$a),
2972              (ins Int64Regs:$t, Float32Regs:$x,
2973                   Float32Regs:$lod),
2974              "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2975              "[$t, \\{$x\\}], $lod;",
2976              []>;
2977def TEX_UNIFIED_1D_S32_F32_GRAD
2978  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2979                    Int32Regs:$b, Int32Regs:$a),
2980              (ins Int64Regs:$t, Float32Regs:$x,
2981                   Float32Regs:$gradx, Float32Regs:$grady),
2982              "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
2983              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
2984              []>;
2985def TEX_UNIFIED_1D_U32_S32
2986  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2987                    Int32Regs:$b, Int32Regs:$a),
2988              (ins Int64Regs:$t, Int32Regs:$x),
2989              "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2990              []>;
2991def TEX_UNIFIED_1D_U32_F32
2992  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2993                    Int32Regs:$b, Int32Regs:$a),
2994              (ins Int64Regs:$t, Float32Regs:$x),
2995              "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
2996              []>;
2997def TEX_UNIFIED_1D_U32_F32_LEVEL
2998  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
2999                    Int32Regs:$b, Int32Regs:$a),
3000              (ins Int64Regs:$t, Float32Regs:$x,
3001                   Float32Regs:$lod),
3002              "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3003              "[$t, \\{$x\\}], $lod;",
3004              []>;
3005def TEX_UNIFIED_1D_U32_F32_GRAD
3006  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3007                    Int32Regs:$b, Int32Regs:$a),
3008              (ins Int64Regs:$t, Float32Regs:$x,
3009                   Float32Regs:$gradx, Float32Regs:$grady),
3010              "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3011              "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3012              []>;
3013
3014def TEX_UNIFIED_1D_ARRAY_F32_S32
3015  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3016                    Float32Regs:$b, Float32Regs:$a),
3017              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3018              "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3019              "[$t, \\{$l, $x\\}];",
3020              []>;
3021def TEX_UNIFIED_1D_ARRAY_F32_F32
3022  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3023                    Float32Regs:$b, Float32Regs:$a),
3024              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3025              "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3026              "[$t, \\{$l, $x\\}];",
3027              []>;
3028def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3029  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3030                    Float32Regs:$b, Float32Regs:$a),
3031              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3032                   Float32Regs:$lod),
3033              "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3034              "[$t, \\{$l, $x\\}], $lod;",
3035              []>;
3036def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3037  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3038                    Float32Regs:$b, Float32Regs:$a),
3039              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3040                   Float32Regs:$gradx, Float32Regs:$grady),
3041              "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3042              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3043              []>;
3044def TEX_UNIFIED_1D_ARRAY_S32_S32
3045  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3046                    Int32Regs:$b, Int32Regs:$a),
3047              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3048              "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3049              "[$t, \\{$l, $x\\}];",
3050              []>;
3051def TEX_UNIFIED_1D_ARRAY_S32_F32
3052  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3053                    Int32Regs:$b, Int32Regs:$a),
3054              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3055              "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3056              "[$t, \\{$l, $x\\}];",
3057              []>;
3058def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3059  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3060                    Int32Regs:$b, Int32Regs:$a),
3061              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3062                   Float32Regs:$lod),
3063              "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3064              "[$t, \\{$l, $x\\}], $lod;",
3065              []>;
3066def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3067  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3068                    Int32Regs:$b, Int32Regs:$a),
3069              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3070                   Float32Regs:$gradx, Float32Regs:$grady),
3071              "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3072              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3073              []>;
3074def TEX_UNIFIED_1D_ARRAY_U32_S32
3075  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3076                    Int32Regs:$b, Int32Regs:$a),
3077              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
3078              "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3079              "[$t, \\{$l, $x\\}];",
3080              []>;
3081def TEX_UNIFIED_1D_ARRAY_U32_F32
3082  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3083                    Int32Regs:$b, Int32Regs:$a),
3084              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
3085              "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3086              "[$t, \\{$l, $x\\}];",
3087              []>;
3088def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3089  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3090                    Int32Regs:$b, Int32Regs:$a),
3091              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3092                   Float32Regs:$lod),
3093              "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3094              "[$t, \\{$l, $x\\}], $lod;",
3095              []>;
3096def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3097  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3098                    Int32Regs:$b, Int32Regs:$a),
3099              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3100                   Float32Regs:$gradx, Float32Regs:$grady),
3101              "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3102              "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3103              []>;
3104
3105def TEX_UNIFIED_2D_F32_S32
3106  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3107                    Float32Regs:$b, Float32Regs:$a),
3108              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3109              "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3110              "[$t, \\{$x, $y\\}];",
3111              []>;
3112def TEX_UNIFIED_2D_F32_F32
3113  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3114                    Float32Regs:$b, Float32Regs:$a),
3115              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3116              "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3117              "[$t, \\{$x, $y\\}];",
3118              []>;
3119def TEX_UNIFIED_2D_F32_F32_LEVEL
3120  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3121                    Float32Regs:$b, Float32Regs:$a),
3122              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3123                   Float32Regs:$lod),
3124              "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3125              "[$t, \\{$x, $y\\}], $lod;",
3126              []>;
3127def TEX_UNIFIED_2D_F32_F32_GRAD
3128  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3129                    Float32Regs:$b, Float32Regs:$a),
3130              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3131                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3132                   Float32Regs:$grady0, Float32Regs:$grady1),
3133              "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3134              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3135              "\\{$grady0, $grady1\\};",
3136              []>;
3137def TEX_UNIFIED_2D_S32_S32
3138  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3139                    Int32Regs:$b, Int32Regs:$a),
3140              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3141              "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3142              "[$t, \\{$x, $y\\}];",
3143              []>;
3144def TEX_UNIFIED_2D_S32_F32
3145  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3146                    Int32Regs:$b, Int32Regs:$a),
3147              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3148              "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3149              "[$t, \\{$x, $y\\}];",
3150              []>;
3151def TEX_UNIFIED_2D_S32_F32_LEVEL
3152  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3153                    Int32Regs:$b, Int32Regs:$a),
3154              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3155                   Float32Regs:$lod),
3156              "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3157              "[$t, \\{$x, $y\\}], $lod;",
3158              []>;
3159def TEX_UNIFIED_2D_S32_F32_GRAD
3160  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3161                    Int32Regs:$b, Int32Regs:$a),
3162              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3163                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3164                   Float32Regs:$grady0, Float32Regs:$grady1),
3165              "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3166              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3167              "\\{$grady0, $grady1\\};",
3168              []>;
3169def TEX_UNIFIED_2D_U32_S32
3170  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3171                    Int32Regs:$b, Int32Regs:$a),
3172              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
3173              "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3174              "[$t, \\{$x, $y\\}];",
3175              []>;
3176def TEX_UNIFIED_2D_U32_F32
3177  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3178                    Int32Regs:$b, Int32Regs:$a),
3179              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3180              "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3181              "[$t, \\{$x, $y\\}];",
3182              []>;
3183def TEX_UNIFIED_2D_U32_F32_LEVEL
3184  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3185                    Int32Regs:$b, Int32Regs:$a),
3186              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3187                   Float32Regs:$lod),
3188              "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3189              "[$t, \\{$x, $y\\}], $lod;",
3190              []>;
3191def TEX_UNIFIED_2D_U32_F32_GRAD
3192  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3193                    Int32Regs:$b, Int32Regs:$a),
3194              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3195                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3196                   Float32Regs:$grady0, Float32Regs:$grady1),
3197              "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3198              "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
3199              "\\{$grady0, $grady1\\};",
3200              []>;
3201
3202def TEX_UNIFIED_2D_ARRAY_F32_S32
3203  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3204                    Float32Regs:$b, Float32Regs:$a),
3205              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3206                   Int32Regs:$y),
3207              "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3208              "[$t, \\{$l, $x, $y, $y\\}];",
3209              []>;
3210def TEX_UNIFIED_2D_ARRAY_F32_F32
3211  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3212                    Float32Regs:$b, Float32Regs:$a),
3213              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3214                   Float32Regs:$y),
3215              "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3216              "[$t, \\{$l, $x, $y, $y\\}];",
3217              []>;
3218def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3219  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3220                    Float32Regs:$b, Float32Regs:$a),
3221              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3222                   Float32Regs:$y, Float32Regs:$lod),
3223              "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3224              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3225              []>;
3226def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3227  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3228                    Float32Regs:$b, Float32Regs:$a),
3229              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3230                   Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
3231                   Float32Regs:$grady0, Float32Regs:$grady1),
3232              "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3233              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3234              "\\{$grady0, $grady1\\};",
3235              []>;
3236def TEX_UNIFIED_2D_ARRAY_S32_S32
3237  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3238                    Int32Regs:$b, Int32Regs:$a),
3239              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3240                   Int32Regs:$y),
3241              "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3242              "[$t, \\{$l, $x, $y, $y\\}];",
3243              []>;
3244def TEX_UNIFIED_2D_ARRAY_S32_F32
3245  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3246                    Int32Regs:$b, Int32Regs:$a),
3247              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3248                   Float32Regs:$y),
3249              "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3250              "[$t, \\{$l, $x, $y, $y\\}];",
3251              []>;
3252def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3253  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3254                    Int32Regs:$b, Int32Regs:$a),
3255              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3256                   Float32Regs:$y, Float32Regs:$lod),
3257              "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3258              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3259              []>;
3260def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3261  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3262                    Int32Regs:$b, Int32Regs:$a),
3263              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3264                   Float32Regs:$y,
3265                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3266                   Float32Regs:$grady0, Float32Regs:$grady1),
3267              "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3268              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3269              "\\{$grady0, $grady1\\};",
3270              []>;
3271def TEX_UNIFIED_2D_ARRAY_U32_S32
3272  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3273                    Int32Regs:$b, Int32Regs:$a),
3274              (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
3275                   Int32Regs:$y),
3276              "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3277              "[$t, \\{$l, $x, $y, $y\\}];",
3278              []>;
3279def TEX_UNIFIED_2D_ARRAY_U32_F32
3280  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3281                    Int32Regs:$b, Int32Regs:$a),
3282              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3283                   Float32Regs:$y),
3284              "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3285              "[$t, \\{$l, $x, $y, $y\\}];",
3286              []>;
3287def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3288  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3289                    Int32Regs:$b, Int32Regs:$a),
3290              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3291                   Float32Regs:$y, Float32Regs:$lod),
3292              "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3293              "[$t, \\{$l, $x, $y, $y\\}], $lod;",
3294              []>;
3295def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3296  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3297                    Int32Regs:$b, Int32Regs:$a),
3298              (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
3299                   Float32Regs:$y,
3300                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3301                   Float32Regs:$grady0, Float32Regs:$grady1),
3302              "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3303              "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
3304              "\\{$grady0, $grady1\\};",
3305              []>;
3306
3307def TEX_UNIFIED_3D_F32_S32
3308  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3309                    Float32Regs:$b, Float32Regs:$a),
3310              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3311                   Int32Regs:$z),
3312              "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
3313              "[$t, \\{$x, $y, $z, $z\\}];",
3314              []>;
3315def TEX_UNIFIED_3D_F32_F32
3316  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3317                    Float32Regs:$b, Float32Regs:$a),
3318              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3319                   Float32Regs:$z),
3320              "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3321              "[$t, \\{$x, $y, $z, $z\\}];",
3322              []>;
3323def TEX_UNIFIED_3D_F32_F32_LEVEL
3324  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3325                    Float32Regs:$b, Float32Regs:$a),
3326              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3327                   Float32Regs:$z, Float32Regs:$lod),
3328              "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3329              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3330              []>;
3331def TEX_UNIFIED_3D_F32_F32_GRAD
3332  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3333                    Float32Regs:$b, Float32Regs:$a),
3334              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3335                   Float32Regs:$z,
3336                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3337                   Float32Regs:$gradx2, Float32Regs:$grady0,
3338                   Float32Regs:$grady1, Float32Regs:$grady2),
3339              "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3340              "[$t, \\{$x, $y, $z, $z\\}], "
3341              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3342              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3343              []>;
3344def TEX_UNIFIED_3D_S32_S32
3345  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3346                    Int32Regs:$b, Int32Regs:$a),
3347              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3348                   Int32Regs:$z),
3349              "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
3350              "[$t, \\{$x, $y, $z, $z\\}];",
3351              []>;
3352def TEX_UNIFIED_3D_S32_F32
3353  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3354                    Int32Regs:$b, Int32Regs:$a),
3355              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3356                   Float32Regs:$z),
3357              "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3358              "[$t, \\{$x, $y, $z, $z\\}];",
3359              []>;
3360def TEX_UNIFIED_3D_S32_F32_LEVEL
3361  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3362                    Int32Regs:$b, Int32Regs:$a),
3363              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3364                   Float32Regs:$z, Float32Regs:$lod),
3365              "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3366              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3367              []>;
3368def TEX_UNIFIED_3D_S32_F32_GRAD
3369  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3370                    Int32Regs:$b, Int32Regs:$a),
3371              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3372                   Float32Regs:$z,
3373                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3374                   Float32Regs:$gradx2, Float32Regs:$grady0,
3375                   Float32Regs:$grady1, Float32Regs:$grady2),
3376              "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3377              "[$t, \\{$x, $y, $z, $z\\}], "
3378              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3379              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3380              []>;
3381def TEX_UNIFIED_3D_U32_S32
3382  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3383                    Int32Regs:$b, Int32Regs:$a),
3384              (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
3385                   Int32Regs:$z),
3386              "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
3387              "[$t, \\{$x, $y, $z, $z\\}];",
3388              []>;
3389def TEX_UNIFIED_3D_U32_F32
3390  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3391                    Int32Regs:$b, Int32Regs:$a),
3392              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3393                   Float32Regs:$z),
3394              "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3395              "[$t, \\{$x, $y, $z, $z\\}];",
3396              []>;
3397def TEX_UNIFIED_3D_U32_F32_LEVEL
3398  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3399                    Int32Regs:$b, Int32Regs:$a),
3400              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3401                   Float32Regs:$z, Float32Regs:$lod),
3402              "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3403              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3404              []>;
3405def TEX_UNIFIED_3D_U32_F32_GRAD
3406  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3407                    Int32Regs:$b, Int32Regs:$a),
3408              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
3409                   Float32Regs:$z,
3410                   Float32Regs:$gradx0, Float32Regs:$gradx1,
3411                   Float32Regs:$gradx2, Float32Regs:$grady0,
3412                   Float32Regs:$grady1, Float32Regs:$grady2),
3413              "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3414              "[$t, \\{$x, $y, $z, $z\\}], "
3415              "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
3416              "\\{$grady0, $grady1, $grady2, $grady2\\};",
3417              []>;
3418
3419def TEX_UNIFIED_CUBE_F32_F32
3420  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3421                    Float32Regs:$b, Float32Regs:$a),
3422              (ins Int64Regs:$t,
3423               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3424              "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3425              "[$t, \\{$x, $y, $z, $z\\}];",
3426              []>;
3427def TEX_UNIFIED_CUBE_F32_F32_LEVEL
3428  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3429                    Float32Regs:$b, Float32Regs:$a),
3430              (ins Int64Regs:$t,
3431                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3432                   Float32Regs:$lod),
3433              "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3434              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3435              []>;
3436def TEX_UNIFIED_CUBE_S32_F32
3437  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3438                    Int32Regs:$b, Int32Regs:$a),
3439              (ins Int64Regs:$t,
3440                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3441              "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3442              "[$t, \\{$x, $y, $z, $z\\}];",
3443              []>;
3444def TEX_UNIFIED_CUBE_S32_F32_LEVEL
3445  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3446                    Int32Regs:$b, Int32Regs:$a),
3447              (ins Int64Regs:$t,
3448                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3449                   Float32Regs:$lod),
3450              "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3451              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3452              []>;
3453def TEX_UNIFIED_CUBE_U32_F32
3454  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3455                    Int32Regs:$b, Int32Regs:$a),
3456              (ins Int64Regs:$t,
3457                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3458              "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3459              "[$t, \\{$x, $y, $z, $z\\}];",
3460              []>;
3461def TEX_UNIFIED_CUBE_U32_F32_LEVEL
3462  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3463                    Int32Regs:$b, Int32Regs:$a),
3464              (ins Int64Regs:$t,
3465                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3466                   Float32Regs:$lod),
3467              "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3468              "[$t, \\{$x, $y, $z, $z\\}], $lod;",
3469              []>;
3470
3471def TEX_UNIFIED_CUBE_ARRAY_F32_F32
3472  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3473                    Float32Regs:$b, Float32Regs:$a),
3474              (ins Int64Regs:$t, Int32Regs:$l,
3475               Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3476              "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3477              "[$t, \\{$l, $x, $y, $z\\}];",
3478              []>;
3479def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3480  : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
3481                    Float32Regs:$b, Float32Regs:$a),
3482              (ins Int64Regs:$t, Int32Regs:$l,
3483                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3484                   Float32Regs:$lod),
3485              "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
3486              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3487              []>;
3488def TEX_UNIFIED_CUBE_ARRAY_S32_F32
3489  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3490                    Int32Regs:$b, Int32Regs:$a),
3491              (ins Int64Regs:$t, Int32Regs:$l,
3492                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3493              "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3494              "[$t, \\{$l, $x, $y, $z\\}];",
3495              []>;
3496def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3497  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3498                    Int32Regs:$b, Int32Regs:$a),
3499              (ins Int64Regs:$t, Int32Regs:$l,
3500                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3501                   Float32Regs:$lod),
3502              "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
3503              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3504              []>;
3505def TEX_UNIFIED_CUBE_ARRAY_U32_F32
3506  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3507                    Int32Regs:$b, Int32Regs:$a),
3508              (ins Int64Regs:$t, Int32Regs:$l,
3509                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
3510              "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3511              "[$t, \\{$l, $x, $y, $z\\}];",
3512              []>;
3513def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3514  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
3515                    Int32Regs:$b, Int32Regs:$a),
3516              (ins Int64Regs:$t, Int32Regs:$l,
3517                   Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
3518                   Float32Regs:$lod),
3519              "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
3520              "[$t, \\{$l, $x, $y, $z\\}], $lod;",
3521              []>;
3522
3523def TLD4_UNIFIED_R_2D_F32_F32
3524  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3525                    Float32Regs:$v2, Float32Regs:$v3),
3526              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3527              "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3528              "[$t, \\{$x, $y\\}];",
3529              []>;
3530def TLD4_UNIFIED_G_2D_F32_F32
3531  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3532                    Float32Regs:$v2, Float32Regs:$v3),
3533              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3534              "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3535              "[$t, \\{$x, $y\\}];",
3536              []>;
3537def TLD4_UNIFIED_B_2D_F32_F32
3538  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3539                    Float32Regs:$v2, Float32Regs:$v3),
3540              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3541              "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3542              "[$t, \\{$x, $y\\}];",
3543              []>;
3544def TLD4_UNIFIED_A_2D_F32_F32
3545  : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
3546                    Float32Regs:$v2, Float32Regs:$v3),
3547              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3548              "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3549              "[$t, \\{$x, $y\\}];",
3550              []>;
3551def TLD4_UNIFIED_R_2D_S32_F32
3552  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3553                    Int32Regs:$v2, Int32Regs:$v3),
3554              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3555              "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3556              "[$t, \\{$x, $y\\}];",
3557              []>;
3558def TLD4_UNIFIED_G_2D_S32_F32
3559  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3560                    Int32Regs:$v2, Int32Regs:$v3),
3561              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3562              "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3563              "[$t, \\{$x, $y\\}];",
3564              []>;
3565def TLD4_UNIFIED_B_2D_S32_F32
3566  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3567                    Int32Regs:$v2, Int32Regs:$v3),
3568              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3569              "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3570              "[$t, \\{$x, $y\\}];",
3571              []>;
3572def TLD4_UNIFIED_A_2D_S32_F32
3573  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3574                    Int32Regs:$v2, Int32Regs:$v3),
3575              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3576              "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3577              "[$t, \\{$x, $y\\}];",
3578              []>;
3579def TLD4_UNIFIED_R_2D_U32_F32
3580  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3581                    Int32Regs:$v2, Int32Regs:$v3),
3582              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3583              "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3584              "[$t, \\{$x, $y\\}];",
3585              []>;
3586def TLD4_UNIFIED_G_2D_U32_F32
3587  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3588                    Int32Regs:$v2, Int32Regs:$v3),
3589              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3590              "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3591              "[$t, \\{$x, $y\\}];",
3592              []>;
3593def TLD4_UNIFIED_B_2D_U32_F32
3594  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3595                    Int32Regs:$v2, Int32Regs:$v3),
3596              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3597              "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3598              "[$t, \\{$x, $y\\}];",
3599              []>;
3600def TLD4_UNIFIED_A_2D_U32_F32
3601  : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
3602                    Int32Regs:$v2, Int32Regs:$v3),
3603              (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
3604              "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
3605              "[$t, \\{$x, $y\\}];",
3606              []>;
3607}
3608
3609
3610
3611//=== Surface load instructions
3612// .clamp variant
3613let IsSuld = 1 in {
3614def SULD_1D_I8_CLAMP
3615  : NVPTXInst<(outs Int16Regs:$r),
3616              (ins Int64Regs:$s, Int32Regs:$x),
3617              "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
3618              []>;
3619def SULD_1D_I16_CLAMP
3620  : NVPTXInst<(outs Int16Regs:$r),
3621              (ins Int64Regs:$s, Int32Regs:$x),
3622              "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
3623              []>;
3624def SULD_1D_I32_CLAMP
3625  : NVPTXInst<(outs Int32Regs:$r),
3626              (ins Int64Regs:$s, Int32Regs:$x),
3627              "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
3628              []>;
3629def SULD_1D_I64_CLAMP
3630  : NVPTXInst<(outs Int64Regs:$r),
3631              (ins Int64Regs:$s, Int32Regs:$x),
3632              "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
3633              []>;
3634
3635def SULD_1D_ARRAY_I8_CLAMP
3636  : NVPTXInst<(outs Int16Regs:$r),
3637              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3638              "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3639              []>;
3640def SULD_1D_ARRAY_I16_CLAMP
3641  : NVPTXInst<(outs Int16Regs:$r),
3642              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3643              "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3644              []>;
3645def SULD_1D_ARRAY_I32_CLAMP
3646  : NVPTXInst<(outs Int32Regs:$r),
3647              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3648              "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3649              []>;
3650def SULD_1D_ARRAY_I64_CLAMP
3651  : NVPTXInst<(outs Int64Regs:$r),
3652              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3653              "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
3654              []>;
3655
3656def SULD_2D_I8_CLAMP
3657  : NVPTXInst<(outs Int16Regs:$r),
3658              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3659              "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3660              []>;
3661def SULD_2D_I16_CLAMP
3662  : NVPTXInst<(outs Int16Regs:$r),
3663              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3664              "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3665              []>;
3666def SULD_2D_I32_CLAMP
3667  : NVPTXInst<(outs Int32Regs:$r),
3668              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3669              "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3670              []>;
3671def SULD_2D_I64_CLAMP
3672  : NVPTXInst<(outs Int64Regs:$r),
3673              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3674              "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
3675              []>;
3676
3677def SULD_2D_ARRAY_I8_CLAMP
3678  : NVPTXInst<(outs Int16Regs:$r),
3679              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3680              "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3681              []>;
3682def SULD_2D_ARRAY_I16_CLAMP
3683  : NVPTXInst<(outs Int16Regs:$r),
3684              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3685              "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3686              []>;
3687def SULD_2D_ARRAY_I32_CLAMP
3688  : NVPTXInst<(outs Int32Regs:$r),
3689              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3690              "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3691              []>;
3692def SULD_2D_ARRAY_I64_CLAMP
3693  : NVPTXInst<(outs Int64Regs:$r),
3694              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3695              "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3696              []>;
3697
3698def SULD_3D_I8_CLAMP
3699  : NVPTXInst<(outs Int16Regs:$r),
3700              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3701              "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3702              []>;
3703def SULD_3D_I16_CLAMP
3704  : NVPTXInst<(outs Int16Regs:$r),
3705              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3706              "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3707              []>;
3708def SULD_3D_I32_CLAMP
3709  : NVPTXInst<(outs Int32Regs:$r),
3710              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3711              "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3712              []>;
3713def SULD_3D_I64_CLAMP
3714  : NVPTXInst<(outs Int64Regs:$r),
3715              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3716              "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3717              []>;
3718}
3719
3720let IsSuld = 2 in {
3721def SULD_1D_V2I8_CLAMP
3722  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3723              (ins Int64Regs:$s, Int32Regs:$x),
3724              "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3725              []>;
3726def SULD_1D_V2I16_CLAMP
3727  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3728              (ins Int64Regs:$s, Int32Regs:$x),
3729              "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3730              []>;
3731def SULD_1D_V2I32_CLAMP
3732  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3733              (ins Int64Regs:$s, Int32Regs:$x),
3734              "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3735              []>;
3736def SULD_1D_V2I64_CLAMP
3737  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3738              (ins Int64Regs:$s, Int32Regs:$x),
3739              "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
3740              []>;
3741
3742def SULD_1D_ARRAY_V2I8_CLAMP
3743  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3744              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3745              "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3746              []>;
3747def SULD_1D_ARRAY_V2I16_CLAMP
3748  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3749              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3750              "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3751              []>;
3752def SULD_1D_ARRAY_V2I32_CLAMP
3753  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3754              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3755              "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3756              []>;
3757def SULD_1D_ARRAY_V2I64_CLAMP
3758  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3759              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3760              "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3761              []>;
3762
3763def SULD_2D_V2I8_CLAMP
3764  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3765              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3766              "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3767              []>;
3768def SULD_2D_V2I16_CLAMP
3769  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3770              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3771              "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3772              []>;
3773def SULD_2D_V2I32_CLAMP
3774  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3775              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3776              "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3777              []>;
3778def SULD_2D_V2I64_CLAMP
3779  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3780              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3781              "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3782              []>;
3783
3784def SULD_2D_ARRAY_V2I8_CLAMP
3785  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3786              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3787              "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
3788              "[$s, \\{$l, $x, $y, $y\\}];",
3789              []>;
3790def SULD_2D_ARRAY_V2I16_CLAMP
3791  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3792              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3793              "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
3794              "[$s, \\{$l, $x, $y, $y\\}];",
3795              []>;
3796def SULD_2D_ARRAY_V2I32_CLAMP
3797  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3798              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3799              "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
3800              "[$s, \\{$l, $x, $y, $y\\}];",
3801              []>;
3802def SULD_2D_ARRAY_V2I64_CLAMP
3803  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3804              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3805              "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
3806              "[$s, \\{$l, $x, $y, $y\\}];",
3807              []>;
3808
3809def SULD_3D_V2I8_CLAMP
3810  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3811              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3812              "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3813              []>;
3814def SULD_3D_V2I16_CLAMP
3815  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
3816              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3817              "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3818              []>;
3819def SULD_3D_V2I32_CLAMP
3820  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
3821              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3822              "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3823              []>;
3824def SULD_3D_V2I64_CLAMP
3825  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
3826              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3827              "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3828              []>;
3829}
3830
3831let IsSuld = 3 in {
3832def SULD_1D_V4I8_CLAMP
3833  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3834              (ins Int64Regs:$s, Int32Regs:$x),
3835              "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3836              []>;
3837def SULD_1D_V4I16_CLAMP
3838  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3839              (ins Int64Regs:$s, Int32Regs:$x),
3840              "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3841              []>;
3842def SULD_1D_V4I32_CLAMP
3843  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3844              (ins Int64Regs:$s, Int32Regs:$x),
3845              "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3846              []>;
3847
3848def SULD_1D_ARRAY_V4I8_CLAMP
3849  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3850              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3851              "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3852              "[$s, \\{$l, $x\\}];",
3853              []>;
3854def SULD_1D_ARRAY_V4I16_CLAMP
3855  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3856              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3857              "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3858              "[$s, \\{$l, $x\\}];",
3859              []>;
3860def SULD_1D_ARRAY_V4I32_CLAMP
3861  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3862              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3863              "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3864              "[$s, \\{$l, $x\\}];",
3865              []>;
3866
3867def SULD_2D_V4I8_CLAMP
3868  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3869              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3870              "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3871              []>;
3872def SULD_2D_V4I16_CLAMP
3873  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3874              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3875              "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3876              []>;
3877def SULD_2D_V4I32_CLAMP
3878  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3879              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3880              "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3881              []>;
3882
3883def SULD_2D_ARRAY_V4I8_CLAMP
3884  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3885              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3886              "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3887              "[$s, \\{$l, $x, $y, $y\\}];",
3888              []>;
3889def SULD_2D_ARRAY_V4I16_CLAMP
3890  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3891              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3892              "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3893              "[$s, \\{$l, $x, $y, $y\\}];",
3894              []>;
3895def SULD_2D_ARRAY_V4I32_CLAMP
3896  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3897              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3898              "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3899              "[$s, \\{$l, $x, $y, $y\\}];",
3900              []>;
3901
3902
3903def SULD_3D_V4I8_CLAMP
3904  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3905              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3906              "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
3907              "[$s, \\{$x, $y, $z, $z\\}];",
3908              []>;
3909def SULD_3D_V4I16_CLAMP
3910  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
3911              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3912              "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
3913              "[$s, \\{$x, $y, $z, $z\\}];",
3914              []>;
3915def SULD_3D_V4I32_CLAMP
3916  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
3917              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
3918              "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
3919              "[$s, \\{$x, $y, $z, $z\\}];",
3920              []>;
3921}
3922
3923
3924// .trap variant
3925let IsSuld = 1 in {
3926def SULD_1D_I8_TRAP
3927  : NVPTXInst<(outs Int16Regs:$r),
3928              (ins Int64Regs:$s, Int32Regs:$x),
3929              "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
3930              []>;
3931def SULD_1D_I16_TRAP
3932  : NVPTXInst<(outs Int16Regs:$r),
3933              (ins Int64Regs:$s, Int32Regs:$x),
3934              "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
3935              []>;
3936def SULD_1D_I32_TRAP
3937  : NVPTXInst<(outs Int32Regs:$r),
3938              (ins Int64Regs:$s, Int32Regs:$x),
3939              "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
3940              []>;
3941def SULD_1D_I64_TRAP
3942  : NVPTXInst<(outs Int64Regs:$r),
3943              (ins Int64Regs:$s, Int32Regs:$x),
3944              "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
3945              []>;
3946
3947def SULD_1D_ARRAY_I8_TRAP
3948  : NVPTXInst<(outs Int16Regs:$r),
3949              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3950              "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3951              []>;
3952def SULD_1D_ARRAY_I16_TRAP
3953  : NVPTXInst<(outs Int16Regs:$r),
3954              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3955              "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3956              []>;
3957def SULD_1D_ARRAY_I32_TRAP
3958  : NVPTXInst<(outs Int32Regs:$r),
3959              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3960              "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3961              []>;
3962def SULD_1D_ARRAY_I64_TRAP
3963  : NVPTXInst<(outs Int64Regs:$r),
3964              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
3965              "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
3966              []>;
3967
3968def SULD_2D_I8_TRAP
3969  : NVPTXInst<(outs Int16Regs:$r),
3970              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3971              "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3972              []>;
3973def SULD_2D_I16_TRAP
3974  : NVPTXInst<(outs Int16Regs:$r),
3975              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3976              "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3977              []>;
3978def SULD_2D_I32_TRAP
3979  : NVPTXInst<(outs Int32Regs:$r),
3980              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3981              "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3982              []>;
3983def SULD_2D_I64_TRAP
3984  : NVPTXInst<(outs Int64Regs:$r),
3985              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
3986              "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
3987              []>;
3988
3989def SULD_2D_ARRAY_I8_TRAP
3990  : NVPTXInst<(outs Int16Regs:$r),
3991              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3992              "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3993              []>;
3994def SULD_2D_ARRAY_I16_TRAP
3995  : NVPTXInst<(outs Int16Regs:$r),
3996              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
3997              "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3998              []>;
3999def SULD_2D_ARRAY_I32_TRAP
4000  : NVPTXInst<(outs Int32Regs:$r),
4001              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4002              "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4003              []>;
4004def SULD_2D_ARRAY_I64_TRAP
4005  : NVPTXInst<(outs Int64Regs:$r),
4006              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4007              "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4008              []>;
4009
4010def SULD_3D_I8_TRAP
4011  : NVPTXInst<(outs Int16Regs:$r),
4012              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4013              "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4014              []>;
4015def SULD_3D_I16_TRAP
4016  : NVPTXInst<(outs Int16Regs:$r),
4017              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4018              "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4019              []>;
4020def SULD_3D_I32_TRAP
4021  : NVPTXInst<(outs Int32Regs:$r),
4022              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4023              "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4024              []>;
4025def SULD_3D_I64_TRAP
4026  : NVPTXInst<(outs Int64Regs:$r),
4027              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4028              "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4029              []>;
4030}
4031
4032let IsSuld = 2 in {
4033def SULD_1D_V2I8_TRAP
4034  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4035              (ins Int64Regs:$s, Int32Regs:$x),
4036              "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4037              []>;
4038def SULD_1D_V2I16_TRAP
4039  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4040              (ins Int64Regs:$s, Int32Regs:$x),
4041              "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4042              []>;
4043def SULD_1D_V2I32_TRAP
4044  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4045              (ins Int64Regs:$s, Int32Regs:$x),
4046              "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4047              []>;
4048def SULD_1D_V2I64_TRAP
4049  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4050              (ins Int64Regs:$s, Int32Regs:$x),
4051              "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
4052              []>;
4053
4054def SULD_1D_ARRAY_V2I8_TRAP
4055  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4056              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4057              "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4058              []>;
4059def SULD_1D_ARRAY_V2I16_TRAP
4060  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4061              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4062              "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4063              []>;
4064def SULD_1D_ARRAY_V2I32_TRAP
4065  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4066              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4067              "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4068              []>;
4069def SULD_1D_ARRAY_V2I64_TRAP
4070  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4071              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4072              "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4073              []>;
4074
4075def SULD_2D_V2I8_TRAP
4076  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4077              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4078              "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4079              []>;
4080def SULD_2D_V2I16_TRAP
4081  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4082              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4083              "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4084              []>;
4085def SULD_2D_V2I32_TRAP
4086  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4087              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4088              "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4089              []>;
4090def SULD_2D_V2I64_TRAP
4091  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4092              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4093              "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4094              []>;
4095
4096def SULD_2D_ARRAY_V2I8_TRAP
4097  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4098              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4099              "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
4100              "[$s, \\{$l, $x, $y, $y\\}];",
4101              []>;
4102def SULD_2D_ARRAY_V2I16_TRAP
4103  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4104              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4105              "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
4106              "[$s, \\{$l, $x, $y, $y\\}];",
4107              []>;
4108def SULD_2D_ARRAY_V2I32_TRAP
4109  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4110              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4111              "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
4112              "[$s, \\{$l, $x, $y, $y\\}];",
4113              []>;
4114def SULD_2D_ARRAY_V2I64_TRAP
4115  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4116              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4117              "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
4118              "[$s, \\{$l, $x, $y, $y\\}];",
4119              []>;
4120
4121def SULD_3D_V2I8_TRAP
4122  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4123              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4124              "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4125              []>;
4126def SULD_3D_V2I16_TRAP
4127  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4128              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4129              "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4130              []>;
4131def SULD_3D_V2I32_TRAP
4132  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4133              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4134              "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4135              []>;
4136def SULD_3D_V2I64_TRAP
4137  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4138              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4139              "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4140              []>;
4141}
4142
4143let IsSuld = 3 in {
4144def SULD_1D_V4I8_TRAP
4145  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4146              (ins Int64Regs:$s, Int32Regs:$x),
4147              "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4148              []>;
4149def SULD_1D_V4I16_TRAP
4150  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4151              (ins Int64Regs:$s, Int32Regs:$x),
4152              "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4153              []>;
4154def SULD_1D_V4I32_TRAP
4155  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4156              (ins Int64Regs:$s, Int32Regs:$x),
4157              "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4158              []>;
4159
4160def SULD_1D_ARRAY_V4I8_TRAP
4161  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4162              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4163              "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4164              "[$s, \\{$l, $x\\}];",
4165              []>;
4166def SULD_1D_ARRAY_V4I16_TRAP
4167  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4168              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4169              "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4170              "[$s, \\{$l, $x\\}];",
4171              []>;
4172def SULD_1D_ARRAY_V4I32_TRAP
4173  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4174              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4175              "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4176              "[$s, \\{$l, $x\\}];",
4177              []>;
4178
4179def SULD_2D_V4I8_TRAP
4180  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4181              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4182              "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4183              []>;
4184def SULD_2D_V4I16_TRAP
4185  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4186              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4187              "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4188              []>;
4189def SULD_2D_V4I32_TRAP
4190  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4191              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4192              "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4193              []>;
4194
4195def SULD_2D_ARRAY_V4I8_TRAP
4196  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4197              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4198              "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4199              "[$s, \\{$l, $x, $y, $y\\}];",
4200              []>;
4201def SULD_2D_ARRAY_V4I16_TRAP
4202  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4203              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4204              "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4205              "[$s, \\{$l, $x, $y, $y\\}];",
4206              []>;
4207def SULD_2D_ARRAY_V4I32_TRAP
4208  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4209              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4210              "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4211              "[$s, \\{$l, $x, $y, $y\\}];",
4212              []>;
4213
4214
4215def SULD_3D_V4I8_TRAP
4216  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4217              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4218              "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
4219              "[$s, \\{$x, $y, $z, $z\\}];",
4220              []>;
4221def SULD_3D_V4I16_TRAP
4222  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4223              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4224              "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
4225              "[$s, \\{$x, $y, $z, $z\\}];",
4226              []>;
4227def SULD_3D_V4I32_TRAP
4228  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4229              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4230              "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
4231              "[$s, \\{$x, $y, $z, $z\\}];",
4232              []>;
4233}
4234
4235// .zero variant
4236let IsSuld = 1 in {
4237def SULD_1D_I8_ZERO
4238  : NVPTXInst<(outs Int16Regs:$r),
4239              (ins Int64Regs:$s, Int32Regs:$x),
4240              "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
4241              []>;
4242def SULD_1D_I16_ZERO
4243  : NVPTXInst<(outs Int16Regs:$r),
4244              (ins Int64Regs:$s, Int32Regs:$x),
4245              "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
4246              []>;
4247def SULD_1D_I32_ZERO
4248  : NVPTXInst<(outs Int32Regs:$r),
4249              (ins Int64Regs:$s, Int32Regs:$x),
4250              "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
4251              []>;
4252def SULD_1D_I64_ZERO
4253  : NVPTXInst<(outs Int64Regs:$r),
4254              (ins Int64Regs:$s, Int32Regs:$x),
4255              "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
4256              []>;
4257
4258def SULD_1D_ARRAY_I8_ZERO
4259  : NVPTXInst<(outs Int16Regs:$r),
4260              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4261              "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4262              []>;
4263def SULD_1D_ARRAY_I16_ZERO
4264  : NVPTXInst<(outs Int16Regs:$r),
4265              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4266              "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4267              []>;
4268def SULD_1D_ARRAY_I32_ZERO
4269  : NVPTXInst<(outs Int32Regs:$r),
4270              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4271              "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4272              []>;
4273def SULD_1D_ARRAY_I64_ZERO
4274  : NVPTXInst<(outs Int64Regs:$r),
4275              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4276              "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
4277              []>;
4278
4279def SULD_2D_I8_ZERO
4280  : NVPTXInst<(outs Int16Regs:$r),
4281              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4282              "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4283              []>;
4284def SULD_2D_I16_ZERO
4285  : NVPTXInst<(outs Int16Regs:$r),
4286              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4287              "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4288              []>;
4289def SULD_2D_I32_ZERO
4290  : NVPTXInst<(outs Int32Regs:$r),
4291              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4292              "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4293              []>;
4294def SULD_2D_I64_ZERO
4295  : NVPTXInst<(outs Int64Regs:$r),
4296              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4297              "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
4298              []>;
4299
4300def SULD_2D_ARRAY_I8_ZERO
4301  : NVPTXInst<(outs Int16Regs:$r),
4302              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4303              "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4304              []>;
4305def SULD_2D_ARRAY_I16_ZERO
4306  : NVPTXInst<(outs Int16Regs:$r),
4307              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4308              "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4309              []>;
4310def SULD_2D_ARRAY_I32_ZERO
4311  : NVPTXInst<(outs Int32Regs:$r),
4312              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4313              "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4314              []>;
4315def SULD_2D_ARRAY_I64_ZERO
4316  : NVPTXInst<(outs Int64Regs:$r),
4317              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4318              "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
4319              []>;
4320
4321def SULD_3D_I8_ZERO
4322  : NVPTXInst<(outs Int16Regs:$r),
4323              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4324              "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4325              []>;
4326def SULD_3D_I16_ZERO
4327  : NVPTXInst<(outs Int16Regs:$r),
4328              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4329              "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4330              []>;
4331def SULD_3D_I32_ZERO
4332  : NVPTXInst<(outs Int32Regs:$r),
4333              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4334              "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4335              []>;
4336def SULD_3D_I64_ZERO
4337  : NVPTXInst<(outs Int64Regs:$r),
4338              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4339              "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
4340              []>;
4341}
4342
4343let IsSuld = 2 in {
4344def SULD_1D_V2I8_ZERO
4345  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4346              (ins Int64Regs:$s, Int32Regs:$x),
4347              "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4348              []>;
4349def SULD_1D_V2I16_ZERO
4350  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4351              (ins Int64Regs:$s, Int32Regs:$x),
4352              "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4353              []>;
4354def SULD_1D_V2I32_ZERO
4355  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4356              (ins Int64Regs:$s, Int32Regs:$x),
4357              "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4358              []>;
4359def SULD_1D_V2I64_ZERO
4360  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4361              (ins Int64Regs:$s, Int32Regs:$x),
4362              "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
4363              []>;
4364
4365def SULD_1D_ARRAY_V2I8_ZERO
4366  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4367              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4368              "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4369              []>;
4370def SULD_1D_ARRAY_V2I16_ZERO
4371  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4372              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4373              "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4374              []>;
4375def SULD_1D_ARRAY_V2I32_ZERO
4376  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4377              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4378              "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4379              []>;
4380def SULD_1D_ARRAY_V2I64_ZERO
4381  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4382              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4383              "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
4384              []>;
4385
4386def SULD_2D_V2I8_ZERO
4387  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4388              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4389              "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4390              []>;
4391def SULD_2D_V2I16_ZERO
4392  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4393              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4394              "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4395              []>;
4396def SULD_2D_V2I32_ZERO
4397  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4398              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4399              "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4400              []>;
4401def SULD_2D_V2I64_ZERO
4402  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4403              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4404              "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4405              []>;
4406
4407def SULD_2D_ARRAY_V2I8_ZERO
4408  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4409              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4410              "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
4411              "[$s, \\{$l, $x, $y, $y\\}];",
4412              []>;
4413def SULD_2D_ARRAY_V2I16_ZERO
4414  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4415              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4416              "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
4417              "[$s, \\{$l, $x, $y, $y\\}];",
4418              []>;
4419def SULD_2D_ARRAY_V2I32_ZERO
4420  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4421              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4422              "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
4423              "[$s, \\{$l, $x, $y, $y\\}];",
4424              []>;
4425def SULD_2D_ARRAY_V2I64_ZERO
4426  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4427              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4428              "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
4429              "[$s, \\{$l, $x, $y, $y\\}];",
4430              []>;
4431
4432def SULD_3D_V2I8_ZERO
4433  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4434              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4435              "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4436              []>;
4437def SULD_3D_V2I16_ZERO
4438  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
4439              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4440              "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4441              []>;
4442def SULD_3D_V2I32_ZERO
4443  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
4444              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4445              "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4446              []>;
4447def SULD_3D_V2I64_ZERO
4448  : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
4449              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4450              "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4451              []>;
4452}
4453
4454let IsSuld = 3 in {
4455def SULD_1D_V4I8_ZERO
4456  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4457              (ins Int64Regs:$s, Int32Regs:$x),
4458              "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4459              []>;
4460def SULD_1D_V4I16_ZERO
4461  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4462              (ins Int64Regs:$s, Int32Regs:$x),
4463              "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4464              []>;
4465def SULD_1D_V4I32_ZERO
4466  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4467              (ins Int64Regs:$s, Int32Regs:$x),
4468              "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4469              []>;
4470
4471def SULD_1D_ARRAY_V4I8_ZERO
4472  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4473              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4474              "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4475              "[$s, \\{$l, $x\\}];",
4476              []>;
4477def SULD_1D_ARRAY_V4I16_ZERO
4478  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4479              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4480              "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4481              "[$s, \\{$l, $x\\}];",
4482              []>;
4483def SULD_1D_ARRAY_V4I32_ZERO
4484  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4485              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
4486              "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4487              "[$s, \\{$l, $x\\}];",
4488              []>;
4489
4490def SULD_2D_V4I8_ZERO
4491  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4492              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4493              "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4494              []>;
4495def SULD_2D_V4I16_ZERO
4496  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4497              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4498              "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4499              []>;
4500def SULD_2D_V4I32_ZERO
4501  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4502              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
4503              "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4504              []>;
4505
4506def SULD_2D_ARRAY_V4I8_ZERO
4507  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4508              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4509              "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4510              "[$s, \\{$l, $x, $y, $y\\}];",
4511              []>;
4512def SULD_2D_ARRAY_V4I16_ZERO
4513  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4514              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4515              "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4516              "[$s, \\{$l, $x, $y, $y\\}];",
4517              []>;
4518def SULD_2D_ARRAY_V4I32_ZERO
4519  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4520              (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
4521              "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4522              "[$s, \\{$l, $x, $y, $y\\}];",
4523              []>;
4524
4525
4526def SULD_3D_V4I8_ZERO
4527  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4528              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4529              "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
4530              "[$s, \\{$x, $y, $z, $z\\}];",
4531              []>;
4532def SULD_3D_V4I16_ZERO
4533  : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4534              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4535              "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
4536              "[$s, \\{$x, $y, $z, $z\\}];",
4537              []>;
4538def SULD_3D_V4I32_ZERO
4539  : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4540              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
4541              "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
4542              "[$s, \\{$x, $y, $z, $z\\}];",
4543              []>;
4544}
4545
4546//-----------------------------------
4547// Texture Query Intrinsics
4548//-----------------------------------
4549
4550let IsSurfTexQuery = 1 in {
4551def TXQ_CHANNEL_ORDER
4552  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4553              "txq.channel_order.b32 \t$d, [$a];",
4554              []>;
4555def TXQ_CHANNEL_DATA_TYPE
4556  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4557              "txq.channel_data_type.b32 \t$d, [$a];",
4558              []>;
4559def TXQ_WIDTH
4560  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4561              "txq.width.b32 \t$d, [$a];",
4562              []>;
4563def TXQ_HEIGHT
4564  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4565              "txq.height.b32 \t$d, [$a];",
4566              []>;
4567def TXQ_DEPTH
4568  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4569              "txq.depth.b32 \t$d, [$a];",
4570              []>;
4571def TXQ_ARRAY_SIZE
4572  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4573              "txq.array_size.b32 \t$d, [$a];",
4574              []>;
4575def TXQ_NUM_SAMPLES
4576  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4577              "txq.num_samples.b32 \t$d, [$a];",
4578              []>;
4579def TXQ_NUM_MIPMAP_LEVELS
4580  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4581              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4582              []>;
4583}
4584
4585def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4586          (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
4587def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4588          (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4589def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4590          (TXQ_WIDTH Int64Regs:$a)>;
4591def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4592          (TXQ_HEIGHT Int64Regs:$a)>;
4593def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4594          (TXQ_DEPTH Int64Regs:$a)>;
4595def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4596          (TXQ_ARRAY_SIZE Int64Regs:$a)>;
4597def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4598          (TXQ_NUM_SAMPLES Int64Regs:$a)>;
4599def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4600          (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
4601
4602
4603//-----------------------------------
4604// Surface Query Intrinsics
4605//-----------------------------------
4606
4607let IsSurfTexQuery = 1 in {
4608def SUQ_CHANNEL_ORDER
4609  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4610              "suq.channel_order.b32 \t$d, [$a];",
4611              []>;
4612def SUQ_CHANNEL_DATA_TYPE
4613  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4614              "suq.channel_data_type.b32 \t$d, [$a];",
4615              []>;
4616def SUQ_WIDTH
4617  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4618              "suq.width.b32 \t$d, [$a];",
4619              []>;
4620def SUQ_HEIGHT
4621  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4622              "suq.height.b32 \t$d, [$a];",
4623              []>;
4624def SUQ_DEPTH
4625  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4626              "suq.depth.b32 \t$d, [$a];",
4627              []>;
4628def SUQ_ARRAY_SIZE
4629  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4630              "suq.array_size.b32 \t$d, [$a];",
4631              []>;
4632}
4633
4634def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4635          (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
4636def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4637          (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
4638def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4639          (SUQ_WIDTH Int64Regs:$a)>;
4640def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4641          (SUQ_HEIGHT Int64Regs:$a)>;
4642def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4643          (SUQ_DEPTH Int64Regs:$a)>;
4644def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4645          (SUQ_ARRAY_SIZE Int64Regs:$a)>;
4646
4647
4648//===- Handle Query -------------------------------------------------------===//
4649
4650// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4651def ISTYPEP_SAMPLER
4652  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4653              "istypep.samplerref \t$d, $a;",
4654              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4655def ISTYPEP_SURFACE
4656  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4657              "istypep.surfref \t$d, $a;",
4658              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4659def ISTYPEP_TEXTURE
4660  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4661              "istypep.texref \t$d, $a;",
4662              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4663
4664//===- Surface Stores -----------------------------------------------------===//
4665
4666let IsSust = 1 in {
4667// Unformatted
4668// .clamp variant
4669def SUST_B_1D_B8_CLAMP
4670  : NVPTXInst<(outs),
4671              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4672              "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4673              []>;
4674def SUST_B_1D_B16_CLAMP
4675  : NVPTXInst<(outs),
4676              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4677              "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4678              []>;
4679def SUST_B_1D_B32_CLAMP
4680  : NVPTXInst<(outs),
4681              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4682              "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4683              []>;
4684def SUST_B_1D_B64_CLAMP
4685  : NVPTXInst<(outs),
4686              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4687              "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
4688              []>;
4689def SUST_B_1D_V2B8_CLAMP
4690  : NVPTXInst<(outs),
4691              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4692              "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4693              []>;
4694def SUST_B_1D_V2B16_CLAMP
4695  : NVPTXInst<(outs),
4696              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4697              "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4698              []>;
4699def SUST_B_1D_V2B32_CLAMP
4700  : NVPTXInst<(outs),
4701              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4702              "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4703              []>;
4704def SUST_B_1D_V2B64_CLAMP
4705  : NVPTXInst<(outs),
4706              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4707              "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
4708              []>;
4709def SUST_B_1D_V4B8_CLAMP
4710  : NVPTXInst<(outs),
4711              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4712                   Int16Regs:$b, Int16Regs:$a),
4713              "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4714              []>;
4715def SUST_B_1D_V4B16_CLAMP
4716  : NVPTXInst<(outs),
4717              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
4718                   Int16Regs:$b, Int16Regs:$a),
4719              "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4720              []>;
4721def SUST_B_1D_V4B32_CLAMP
4722  : NVPTXInst<(outs),
4723              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
4724                   Int32Regs:$b, Int32Regs:$a),
4725              "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4726              []>;
4727
4728
4729def SUST_B_1D_ARRAY_B8_CLAMP
4730  : NVPTXInst<(outs),
4731              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4732              "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4733              []>;
4734def SUST_B_1D_ARRAY_B16_CLAMP
4735  : NVPTXInst<(outs),
4736              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
4737              "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4738              []>;
4739def SUST_B_1D_ARRAY_B32_CLAMP
4740  : NVPTXInst<(outs),
4741              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
4742              "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4743              []>;
4744def SUST_B_1D_ARRAY_B64_CLAMP
4745  : NVPTXInst<(outs),
4746              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
4747              "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4748              []>;
4749def SUST_B_1D_ARRAY_V2B8_CLAMP
4750  : NVPTXInst<(outs),
4751              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4752                   Int16Regs:$g),
4753              "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4754              []>;
4755def SUST_B_1D_ARRAY_V2B16_CLAMP
4756  : NVPTXInst<(outs),
4757              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4758                   Int16Regs:$g),
4759              "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4760              []>;
4761def SUST_B_1D_ARRAY_V2B32_CLAMP
4762  : NVPTXInst<(outs),
4763              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4764                   Int32Regs:$g),
4765              "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4766              []>;
4767def SUST_B_1D_ARRAY_V2B64_CLAMP
4768  : NVPTXInst<(outs),
4769              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
4770                   Int64Regs:$g),
4771              "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4772              []>;
4773def SUST_B_1D_ARRAY_V4B8_CLAMP
4774  : NVPTXInst<(outs),
4775              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4776                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4777              "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
4778              "\\{$r, $g, $b, $a\\};",
4779              []>;
4780def SUST_B_1D_ARRAY_V4B16_CLAMP
4781  : NVPTXInst<(outs),
4782              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
4783                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4784             "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
4785             "\\{$r, $g, $b, $a\\};",
4786              []>;
4787def SUST_B_1D_ARRAY_V4B32_CLAMP
4788  : NVPTXInst<(outs),
4789              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
4790                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4791             "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
4792             "\\{$r, $g, $b, $a\\};",
4793              []>;
4794
4795
4796def SUST_B_2D_B8_CLAMP
4797  : NVPTXInst<(outs),
4798              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4799              "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4800              []>;
4801def SUST_B_2D_B16_CLAMP
4802  : NVPTXInst<(outs),
4803              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4804              "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4805              []>;
4806def SUST_B_2D_B32_CLAMP
4807  : NVPTXInst<(outs),
4808              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4809              "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4810              []>;
4811def SUST_B_2D_B64_CLAMP
4812  : NVPTXInst<(outs),
4813              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4814              "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
4815              []>;
4816def SUST_B_2D_V2B8_CLAMP
4817  : NVPTXInst<(outs),
4818              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4819                   Int16Regs:$g),
4820              "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4821              []>;
4822def SUST_B_2D_V2B16_CLAMP
4823  : NVPTXInst<(outs),
4824              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4825                   Int16Regs:$g),
4826              "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4827              []>;
4828def SUST_B_2D_V2B32_CLAMP
4829  : NVPTXInst<(outs),
4830              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4831                   Int32Regs:$g),
4832              "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4833              []>;
4834def SUST_B_2D_V2B64_CLAMP
4835  : NVPTXInst<(outs),
4836              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4837                   Int64Regs:$g),
4838              "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4839              []>;
4840def SUST_B_2D_V4B8_CLAMP
4841  : NVPTXInst<(outs),
4842              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4843                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4844              "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
4845              "\\{$r, $g, $b, $a\\};",
4846              []>;
4847def SUST_B_2D_V4B16_CLAMP
4848  : NVPTXInst<(outs),
4849              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
4850                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4851             "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
4852             "\\{$r, $g, $b, $a\\};",
4853              []>;
4854def SUST_B_2D_V4B32_CLAMP
4855  : NVPTXInst<(outs),
4856              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4857                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4858             "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
4859             "\\{$r, $g, $b, $a\\};",
4860              []>;
4861
4862
4863def SUST_B_2D_ARRAY_B8_CLAMP
4864  : NVPTXInst<(outs),
4865              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4866                   Int16Regs:$r),
4867              "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4868              []>;
4869def SUST_B_2D_ARRAY_B16_CLAMP
4870  : NVPTXInst<(outs),
4871              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4872                   Int16Regs:$r),
4873              "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4874              []>;
4875def SUST_B_2D_ARRAY_B32_CLAMP
4876  : NVPTXInst<(outs),
4877              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4878                   Int32Regs:$r),
4879              "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4880              []>;
4881def SUST_B_2D_ARRAY_B64_CLAMP
4882  : NVPTXInst<(outs),
4883              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4884                   Int64Regs:$r),
4885              "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4886              []>;
4887def SUST_B_2D_ARRAY_V2B8_CLAMP
4888  : NVPTXInst<(outs),
4889              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4890                   Int16Regs:$r, Int16Regs:$g),
4891              "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4892              "\\{$r, $g\\};",
4893              []>;
4894def SUST_B_2D_ARRAY_V2B16_CLAMP
4895  : NVPTXInst<(outs),
4896              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4897                   Int16Regs:$r, Int16Regs:$g),
4898             "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4899             "\\{$r, $g\\};",
4900              []>;
4901def SUST_B_2D_ARRAY_V2B32_CLAMP
4902  : NVPTXInst<(outs),
4903              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4904                   Int32Regs:$r, Int32Regs:$g),
4905             "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4906             "\\{$r, $g\\};",
4907              []>;
4908def SUST_B_2D_ARRAY_V2B64_CLAMP
4909  : NVPTXInst<(outs),
4910              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4911                   Int64Regs:$r, Int64Regs:$g),
4912             "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4913             "\\{$r, $g\\};",
4914              []>;
4915def SUST_B_2D_ARRAY_V4B8_CLAMP
4916  : NVPTXInst<(outs),
4917              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4918                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4919      "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4920      "\\{$r, $g, $b, $a\\};",
4921              []>;
4922def SUST_B_2D_ARRAY_V4B16_CLAMP
4923  : NVPTXInst<(outs),
4924              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4925                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4926     "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4927     "\\{$r, $g, $b, $a\\};",
4928              []>;
4929def SUST_B_2D_ARRAY_V4B32_CLAMP
4930  : NVPTXInst<(outs),
4931              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4932                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4933     "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
4934     "\\{$r, $g, $b, $a\\};",
4935              []>;
4936
4937
4938def SUST_B_3D_B8_CLAMP
4939  : NVPTXInst<(outs),
4940              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4941                   Int16Regs:$r),
4942              "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4943              []>;
4944def SUST_B_3D_B16_CLAMP
4945  : NVPTXInst<(outs),
4946              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4947                   Int16Regs:$r),
4948              "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4949              []>;
4950def SUST_B_3D_B32_CLAMP
4951  : NVPTXInst<(outs),
4952              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4953                   Int32Regs:$r),
4954              "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4955              []>;
4956def SUST_B_3D_B64_CLAMP
4957  : NVPTXInst<(outs),
4958              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4959                   Int64Regs:$r),
4960              "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4961              []>;
4962def SUST_B_3D_V2B8_CLAMP
4963  : NVPTXInst<(outs),
4964              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4965                   Int16Regs:$r, Int16Regs:$g),
4966              "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4967              "\\{$r, $g\\};",
4968              []>;
4969def SUST_B_3D_V2B16_CLAMP
4970  : NVPTXInst<(outs),
4971              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4972                   Int16Regs:$r, Int16Regs:$g),
4973              "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4974              "\\{$r, $g\\};",
4975              []>;
4976def SUST_B_3D_V2B32_CLAMP
4977  : NVPTXInst<(outs),
4978              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4979                   Int32Regs:$r, Int32Regs:$g),
4980              "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4981              "\\{$r, $g\\};",
4982              []>;
4983def SUST_B_3D_V2B64_CLAMP
4984  : NVPTXInst<(outs),
4985              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4986                   Int64Regs:$r, Int64Regs:$g),
4987              "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4988              "\\{$r, $g\\};",
4989              []>;
4990def SUST_B_3D_V4B8_CLAMP
4991  : NVPTXInst<(outs),
4992              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4993                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4994         "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
4995         "\\{$r, $g, $b, $a\\};",
4996              []>;
4997def SUST_B_3D_V4B16_CLAMP
4998  : NVPTXInst<(outs),
4999              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5000                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5001        "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5002        "\\{$r, $g, $b, $a\\};",
5003              []>;
5004def SUST_B_3D_V4B32_CLAMP
5005  : NVPTXInst<(outs),
5006              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5007                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5008        "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
5009        "\\{$r, $g, $b, $a\\};",
5010              []>;
5011
5012
5013// .trap variant
5014def SUST_B_1D_B8_TRAP
5015  : NVPTXInst<(outs),
5016              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5017              "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5018              []>;
5019def SUST_B_1D_B16_TRAP
5020  : NVPTXInst<(outs),
5021              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5022              "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5023              []>;
5024def SUST_B_1D_B32_TRAP
5025  : NVPTXInst<(outs),
5026              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5027              "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5028              []>;
5029def SUST_B_1D_B64_TRAP
5030  : NVPTXInst<(outs),
5031              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5032              "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
5033              []>;
5034def SUST_B_1D_V2B8_TRAP
5035  : NVPTXInst<(outs),
5036              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5037              "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5038              []>;
5039def SUST_B_1D_V2B16_TRAP
5040  : NVPTXInst<(outs),
5041              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5042              "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5043              []>;
5044def SUST_B_1D_V2B32_TRAP
5045  : NVPTXInst<(outs),
5046              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5047              "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5048              []>;
5049def SUST_B_1D_V2B64_TRAP
5050  : NVPTXInst<(outs),
5051              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5052              "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5053              []>;
5054def SUST_B_1D_V4B8_TRAP
5055  : NVPTXInst<(outs),
5056              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5057                   Int16Regs:$b, Int16Regs:$a),
5058              "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5059              []>;
5060def SUST_B_1D_V4B16_TRAP
5061  : NVPTXInst<(outs),
5062              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5063                   Int16Regs:$b, Int16Regs:$a),
5064              "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5065              []>;
5066def SUST_B_1D_V4B32_TRAP
5067  : NVPTXInst<(outs),
5068              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5069                   Int32Regs:$b, Int32Regs:$a),
5070              "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5071              []>;
5072
5073
5074def SUST_B_1D_ARRAY_B8_TRAP
5075  : NVPTXInst<(outs),
5076              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5077              "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5078              []>;
5079def SUST_B_1D_ARRAY_B16_TRAP
5080  : NVPTXInst<(outs),
5081              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5082              "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5083              []>;
5084def SUST_B_1D_ARRAY_B32_TRAP
5085  : NVPTXInst<(outs),
5086              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5087              "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5088              []>;
5089def SUST_B_1D_ARRAY_B64_TRAP
5090  : NVPTXInst<(outs),
5091              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5092              "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5093              []>;
5094def SUST_B_1D_ARRAY_V2B8_TRAP
5095  : NVPTXInst<(outs),
5096              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5097                   Int16Regs:$g),
5098              "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5099              []>;
5100def SUST_B_1D_ARRAY_V2B16_TRAP
5101  : NVPTXInst<(outs),
5102              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5103                   Int16Regs:$g),
5104              "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5105              []>;
5106def SUST_B_1D_ARRAY_V2B32_TRAP
5107  : NVPTXInst<(outs),
5108              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5109                   Int32Regs:$g),
5110              "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5111              []>;
5112def SUST_B_1D_ARRAY_V2B64_TRAP
5113  : NVPTXInst<(outs),
5114              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5115                   Int64Regs:$g),
5116              "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5117              []>;
5118def SUST_B_1D_ARRAY_V4B8_TRAP
5119  : NVPTXInst<(outs),
5120              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5121                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5122              "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5123              "\\{$r, $g, $b, $a\\};",
5124              []>;
5125def SUST_B_1D_ARRAY_V4B16_TRAP
5126  : NVPTXInst<(outs),
5127              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5128                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5129             "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5130             "\\{$r, $g, $b, $a\\};",
5131              []>;
5132def SUST_B_1D_ARRAY_V4B32_TRAP
5133  : NVPTXInst<(outs),
5134              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5135                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5136             "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5137             "\\{$r, $g, $b, $a\\};",
5138              []>;
5139
5140
5141def SUST_B_2D_B8_TRAP
5142  : NVPTXInst<(outs),
5143              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5144              "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5145              []>;
5146def SUST_B_2D_B16_TRAP
5147  : NVPTXInst<(outs),
5148              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5149              "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5150              []>;
5151def SUST_B_2D_B32_TRAP
5152  : NVPTXInst<(outs),
5153              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5154              "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5155              []>;
5156def SUST_B_2D_B64_TRAP
5157  : NVPTXInst<(outs),
5158              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5159              "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5160              []>;
5161def SUST_B_2D_V2B8_TRAP
5162  : NVPTXInst<(outs),
5163              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5164                   Int16Regs:$g),
5165              "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5166              []>;
5167def SUST_B_2D_V2B16_TRAP
5168  : NVPTXInst<(outs),
5169              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5170                   Int16Regs:$g),
5171              "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5172              []>;
5173def SUST_B_2D_V2B32_TRAP
5174  : NVPTXInst<(outs),
5175              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5176                   Int32Regs:$g),
5177              "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5178              []>;
5179def SUST_B_2D_V2B64_TRAP
5180  : NVPTXInst<(outs),
5181              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5182                   Int64Regs:$g),
5183              "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5184              []>;
5185def SUST_B_2D_V4B8_TRAP
5186  : NVPTXInst<(outs),
5187              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5188                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5189              "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5190              "\\{$r, $g, $b, $a\\};",
5191              []>;
5192def SUST_B_2D_V4B16_TRAP
5193  : NVPTXInst<(outs),
5194              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5195                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5196             "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5197             "\\{$r, $g, $b, $a\\};",
5198              []>;
5199def SUST_B_2D_V4B32_TRAP
5200  : NVPTXInst<(outs),
5201              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5202                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5203             "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5204             "\\{$r, $g, $b, $a\\};",
5205              []>;
5206
5207
5208def SUST_B_2D_ARRAY_B8_TRAP
5209  : NVPTXInst<(outs),
5210              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5211                   Int16Regs:$r),
5212              "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5213              []>;
5214def SUST_B_2D_ARRAY_B16_TRAP
5215  : NVPTXInst<(outs),
5216              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5217                   Int16Regs:$r),
5218              "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5219              []>;
5220def SUST_B_2D_ARRAY_B32_TRAP
5221  : NVPTXInst<(outs),
5222              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5223                   Int32Regs:$r),
5224              "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5225              []>;
5226def SUST_B_2D_ARRAY_B64_TRAP
5227  : NVPTXInst<(outs),
5228              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5229                   Int64Regs:$r),
5230              "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5231              []>;
5232def SUST_B_2D_ARRAY_V2B8_TRAP
5233  : NVPTXInst<(outs),
5234              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5235                   Int16Regs:$r, Int16Regs:$g),
5236              "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5237              "\\{$r, $g\\};",
5238              []>;
5239def SUST_B_2D_ARRAY_V2B16_TRAP
5240  : NVPTXInst<(outs),
5241              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5242                   Int16Regs:$r, Int16Regs:$g),
5243             "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5244             "\\{$r, $g\\};",
5245              []>;
5246def SUST_B_2D_ARRAY_V2B32_TRAP
5247  : NVPTXInst<(outs),
5248              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5249                   Int32Regs:$r, Int32Regs:$g),
5250             "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5251             "\\{$r, $g\\};",
5252              []>;
5253def SUST_B_2D_ARRAY_V2B64_TRAP
5254  : NVPTXInst<(outs),
5255              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5256                   Int64Regs:$r, Int64Regs:$g),
5257             "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5258             "\\{$r, $g\\};",
5259              []>;
5260def SUST_B_2D_ARRAY_V4B8_TRAP
5261  : NVPTXInst<(outs),
5262              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5263                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5264      "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5265      "\\{$r, $g, $b, $a\\};",
5266              []>;
5267def SUST_B_2D_ARRAY_V4B16_TRAP
5268  : NVPTXInst<(outs),
5269              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5270                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5271     "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5272     "\\{$r, $g, $b, $a\\};",
5273              []>;
5274def SUST_B_2D_ARRAY_V4B32_TRAP
5275  : NVPTXInst<(outs),
5276              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5277                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5278     "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5279     "\\{$r, $g, $b, $a\\};",
5280              []>;
5281
5282
5283def SUST_B_3D_B8_TRAP
5284  : NVPTXInst<(outs),
5285              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5286                   Int16Regs:$r),
5287              "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5288              []>;
5289def SUST_B_3D_B16_TRAP
5290  : NVPTXInst<(outs),
5291              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5292                   Int16Regs:$r),
5293              "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5294              []>;
5295def SUST_B_3D_B32_TRAP
5296  : NVPTXInst<(outs),
5297              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5298                   Int32Regs:$r),
5299              "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5300              []>;
5301def SUST_B_3D_B64_TRAP
5302  : NVPTXInst<(outs),
5303              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5304                   Int64Regs:$r),
5305              "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5306              []>;
5307def SUST_B_3D_V2B8_TRAP
5308  : NVPTXInst<(outs),
5309              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5310                   Int16Regs:$r, Int16Regs:$g),
5311              "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5312              "\\{$r, $g\\};",
5313              []>;
5314def SUST_B_3D_V2B16_TRAP
5315  : NVPTXInst<(outs),
5316              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5317                   Int16Regs:$r, Int16Regs:$g),
5318              "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5319              "\\{$r, $g\\};",
5320              []>;
5321def SUST_B_3D_V2B32_TRAP
5322  : NVPTXInst<(outs),
5323              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5324                   Int32Regs:$r, Int32Regs:$g),
5325              "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5326              "\\{$r, $g\\};",
5327              []>;
5328def SUST_B_3D_V2B64_TRAP
5329  : NVPTXInst<(outs),
5330              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5331                   Int64Regs:$r, Int64Regs:$g),
5332              "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5333              "\\{$r, $g\\};",
5334              []>;
5335def SUST_B_3D_V4B8_TRAP
5336  : NVPTXInst<(outs),
5337              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5338                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5339         "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5340         "\\{$r, $g, $b, $a\\};",
5341              []>;
5342def SUST_B_3D_V4B16_TRAP
5343  : NVPTXInst<(outs),
5344              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5345                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5346        "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5347        "\\{$r, $g, $b, $a\\};",
5348              []>;
5349def SUST_B_3D_V4B32_TRAP
5350  : NVPTXInst<(outs),
5351              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5352                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5353        "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5354        "\\{$r, $g, $b, $a\\};",
5355              []>;
5356
5357
5358// .zero variant
5359def SUST_B_1D_B8_ZERO
5360  : NVPTXInst<(outs),
5361              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5362              "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
5363              []>;
5364def SUST_B_1D_B16_ZERO
5365  : NVPTXInst<(outs),
5366              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5367              "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
5368              []>;
5369def SUST_B_1D_B32_ZERO
5370  : NVPTXInst<(outs),
5371              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5372              "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
5373              []>;
5374def SUST_B_1D_B64_ZERO
5375  : NVPTXInst<(outs),
5376              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5377              "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
5378              []>;
5379def SUST_B_1D_V2B8_ZERO
5380  : NVPTXInst<(outs),
5381              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5382              "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5383              []>;
5384def SUST_B_1D_V2B16_ZERO
5385  : NVPTXInst<(outs),
5386              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5387              "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5388              []>;
5389def SUST_B_1D_V2B32_ZERO
5390  : NVPTXInst<(outs),
5391              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5392              "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5393              []>;
5394def SUST_B_1D_V2B64_ZERO
5395  : NVPTXInst<(outs),
5396              (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5397              "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
5398              []>;
5399def SUST_B_1D_V4B8_ZERO
5400  : NVPTXInst<(outs),
5401              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5402                   Int16Regs:$b, Int16Regs:$a),
5403              "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5404              []>;
5405def SUST_B_1D_V4B16_ZERO
5406  : NVPTXInst<(outs),
5407              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5408                   Int16Regs:$b, Int16Regs:$a),
5409              "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5410              []>;
5411def SUST_B_1D_V4B32_ZERO
5412  : NVPTXInst<(outs),
5413              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5414                   Int32Regs:$b, Int32Regs:$a),
5415              "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5416              []>;
5417
5418
5419def SUST_B_1D_ARRAY_B8_ZERO
5420  : NVPTXInst<(outs),
5421              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5422              "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5423              []>;
5424def SUST_B_1D_ARRAY_B16_ZERO
5425  : NVPTXInst<(outs),
5426              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5427              "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5428              []>;
5429def SUST_B_1D_ARRAY_B32_ZERO
5430  : NVPTXInst<(outs),
5431              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5432              "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5433              []>;
5434def SUST_B_1D_ARRAY_B64_ZERO
5435  : NVPTXInst<(outs),
5436              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
5437              "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5438              []>;
5439def SUST_B_1D_ARRAY_V2B8_ZERO
5440  : NVPTXInst<(outs),
5441              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5442                   Int16Regs:$g),
5443              "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5444              []>;
5445def SUST_B_1D_ARRAY_V2B16_ZERO
5446  : NVPTXInst<(outs),
5447              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5448                   Int16Regs:$g),
5449              "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5450              []>;
5451def SUST_B_1D_ARRAY_V2B32_ZERO
5452  : NVPTXInst<(outs),
5453              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5454                   Int32Regs:$g),
5455              "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5456              []>;
5457def SUST_B_1D_ARRAY_V2B64_ZERO
5458  : NVPTXInst<(outs),
5459              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
5460                   Int64Regs:$g),
5461              "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5462              []>;
5463def SUST_B_1D_ARRAY_V4B8_ZERO
5464  : NVPTXInst<(outs),
5465              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5466                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5467              "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
5468              "\\{$r, $g, $b, $a\\};",
5469              []>;
5470def SUST_B_1D_ARRAY_V4B16_ZERO
5471  : NVPTXInst<(outs),
5472              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5473                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5474             "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
5475             "\\{$r, $g, $b, $a\\};",
5476              []>;
5477def SUST_B_1D_ARRAY_V4B32_ZERO
5478  : NVPTXInst<(outs),
5479              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5480                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5481             "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
5482             "\\{$r, $g, $b, $a\\};",
5483              []>;
5484
5485
5486def SUST_B_2D_B8_ZERO
5487  : NVPTXInst<(outs),
5488              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5489              "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5490              []>;
5491def SUST_B_2D_B16_ZERO
5492  : NVPTXInst<(outs),
5493              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5494              "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5495              []>;
5496def SUST_B_2D_B32_ZERO
5497  : NVPTXInst<(outs),
5498              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5499              "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5500              []>;
5501def SUST_B_2D_B64_ZERO
5502  : NVPTXInst<(outs),
5503              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5504              "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
5505              []>;
5506def SUST_B_2D_V2B8_ZERO
5507  : NVPTXInst<(outs),
5508              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5509                   Int16Regs:$g),
5510              "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5511              []>;
5512def SUST_B_2D_V2B16_ZERO
5513  : NVPTXInst<(outs),
5514              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5515                   Int16Regs:$g),
5516              "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5517              []>;
5518def SUST_B_2D_V2B32_ZERO
5519  : NVPTXInst<(outs),
5520              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5521                   Int32Regs:$g),
5522              "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5523              []>;
5524def SUST_B_2D_V2B64_ZERO
5525  : NVPTXInst<(outs),
5526              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5527                   Int64Regs:$g),
5528              "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5529              []>;
5530def SUST_B_2D_V4B8_ZERO
5531  : NVPTXInst<(outs),
5532              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5533                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5534              "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
5535              "\\{$r, $g, $b, $a\\};",
5536              []>;
5537def SUST_B_2D_V4B16_ZERO
5538  : NVPTXInst<(outs),
5539              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5540                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5541             "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
5542             "\\{$r, $g, $b, $a\\};",
5543              []>;
5544def SUST_B_2D_V4B32_ZERO
5545  : NVPTXInst<(outs),
5546              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5547                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5548             "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
5549             "\\{$r, $g, $b, $a\\};",
5550              []>;
5551
5552
5553def SUST_B_2D_ARRAY_B8_ZERO
5554  : NVPTXInst<(outs),
5555              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5556                   Int16Regs:$r),
5557              "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5558              []>;
5559def SUST_B_2D_ARRAY_B16_ZERO
5560  : NVPTXInst<(outs),
5561              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5562                   Int16Regs:$r),
5563              "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5564              []>;
5565def SUST_B_2D_ARRAY_B32_ZERO
5566  : NVPTXInst<(outs),
5567              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5568                   Int32Regs:$r),
5569              "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5570              []>;
5571def SUST_B_2D_ARRAY_B64_ZERO
5572  : NVPTXInst<(outs),
5573              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5574                   Int64Regs:$r),
5575              "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5576              []>;
5577def SUST_B_2D_ARRAY_V2B8_ZERO
5578  : NVPTXInst<(outs),
5579              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5580                   Int16Regs:$r, Int16Regs:$g),
5581              "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5582              "\\{$r, $g\\};",
5583              []>;
5584def SUST_B_2D_ARRAY_V2B16_ZERO
5585  : NVPTXInst<(outs),
5586              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5587                   Int16Regs:$r, Int16Regs:$g),
5588             "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5589             "\\{$r, $g\\};",
5590              []>;
5591def SUST_B_2D_ARRAY_V2B32_ZERO
5592  : NVPTXInst<(outs),
5593              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5594                   Int32Regs:$r, Int32Regs:$g),
5595             "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5596             "\\{$r, $g\\};",
5597              []>;
5598def SUST_B_2D_ARRAY_V2B64_ZERO
5599  : NVPTXInst<(outs),
5600              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5601                   Int64Regs:$r, Int64Regs:$g),
5602             "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5603             "\\{$r, $g\\};",
5604              []>;
5605def SUST_B_2D_ARRAY_V4B8_ZERO
5606  : NVPTXInst<(outs),
5607              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5608                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5609      "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5610      "\\{$r, $g, $b, $a\\};",
5611              []>;
5612def SUST_B_2D_ARRAY_V4B16_ZERO
5613  : NVPTXInst<(outs),
5614              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5615                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5616     "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5617     "\\{$r, $g, $b, $a\\};",
5618              []>;
5619def SUST_B_2D_ARRAY_V4B32_ZERO
5620  : NVPTXInst<(outs),
5621              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5622                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5623     "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
5624     "\\{$r, $g, $b, $a\\};",
5625              []>;
5626
5627
5628def SUST_B_3D_B8_ZERO
5629  : NVPTXInst<(outs),
5630              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5631                   Int16Regs:$r),
5632              "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5633              []>;
5634def SUST_B_3D_B16_ZERO
5635  : NVPTXInst<(outs),
5636              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5637                   Int16Regs:$r),
5638              "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5639              []>;
5640def SUST_B_3D_B32_ZERO
5641  : NVPTXInst<(outs),
5642              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5643                   Int32Regs:$r),
5644              "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5645              []>;
5646def SUST_B_3D_B64_ZERO
5647  : NVPTXInst<(outs),
5648              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5649                   Int64Regs:$r),
5650              "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5651              []>;
5652def SUST_B_3D_V2B8_ZERO
5653  : NVPTXInst<(outs),
5654              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5655                   Int16Regs:$r, Int16Regs:$g),
5656              "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5657              "\\{$r, $g\\};",
5658              []>;
5659def SUST_B_3D_V2B16_ZERO
5660  : NVPTXInst<(outs),
5661              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5662                   Int16Regs:$r, Int16Regs:$g),
5663              "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5664              "\\{$r, $g\\};",
5665              []>;
5666def SUST_B_3D_V2B32_ZERO
5667  : NVPTXInst<(outs),
5668              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5669                   Int32Regs:$r, Int32Regs:$g),
5670              "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5671              "\\{$r, $g\\};",
5672              []>;
5673def SUST_B_3D_V2B64_ZERO
5674  : NVPTXInst<(outs),
5675              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5676                   Int64Regs:$r, Int64Regs:$g),
5677              "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5678              "\\{$r, $g\\};",
5679              []>;
5680def SUST_B_3D_V4B8_ZERO
5681  : NVPTXInst<(outs),
5682              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5683                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5684         "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5685         "\\{$r, $g, $b, $a\\};",
5686              []>;
5687def SUST_B_3D_V4B16_ZERO
5688  : NVPTXInst<(outs),
5689              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5690                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5691        "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5692        "\\{$r, $g, $b, $a\\};",
5693              []>;
5694def SUST_B_3D_V4B32_ZERO
5695  : NVPTXInst<(outs),
5696              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5697                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5698        "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
5699        "\\{$r, $g, $b, $a\\};",
5700              []>;
5701
5702
5703
5704// Formatted
5705
5706def SUST_P_1D_B8_TRAP
5707  : NVPTXInst<(outs),
5708              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5709              "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
5710              []>;
5711def SUST_P_1D_B16_TRAP
5712  : NVPTXInst<(outs),
5713              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5714              "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
5715              []>;
5716def SUST_P_1D_B32_TRAP
5717  : NVPTXInst<(outs),
5718              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5719              "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
5720              []>;
5721def SUST_P_1D_V2B8_TRAP
5722  : NVPTXInst<(outs),
5723              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5724              "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5725              []>;
5726def SUST_P_1D_V2B16_TRAP
5727  : NVPTXInst<(outs),
5728              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5729              "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5730              []>;
5731def SUST_P_1D_V2B32_TRAP
5732  : NVPTXInst<(outs),
5733              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5734              "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
5735              []>;
5736def SUST_P_1D_V4B8_TRAP
5737  : NVPTXInst<(outs),
5738              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5739                   Int16Regs:$b, Int16Regs:$a),
5740              "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5741              []>;
5742def SUST_P_1D_V4B16_TRAP
5743  : NVPTXInst<(outs),
5744              (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
5745                   Int16Regs:$b, Int16Regs:$a),
5746              "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5747              []>;
5748def SUST_P_1D_V4B32_TRAP
5749  : NVPTXInst<(outs),
5750              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
5751                   Int32Regs:$b, Int32Regs:$a),
5752              "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
5753              []>;
5754
5755
5756def SUST_P_1D_ARRAY_B8_TRAP
5757  : NVPTXInst<(outs),
5758              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5759              "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5760              []>;
5761def SUST_P_1D_ARRAY_B16_TRAP
5762  : NVPTXInst<(outs),
5763              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
5764              "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5765              []>;
5766def SUST_P_1D_ARRAY_B32_TRAP
5767  : NVPTXInst<(outs),
5768              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
5769              "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
5770              []>;
5771def SUST_P_1D_ARRAY_V2B8_TRAP
5772  : NVPTXInst<(outs),
5773              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5774                   Int16Regs:$g),
5775              "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5776              []>;
5777def SUST_P_1D_ARRAY_V2B16_TRAP
5778  : NVPTXInst<(outs),
5779              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5780                   Int16Regs:$g),
5781              "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5782              []>;
5783def SUST_P_1D_ARRAY_V2B32_TRAP
5784  : NVPTXInst<(outs),
5785              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5786                   Int32Regs:$g),
5787              "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
5788              []>;
5789def SUST_P_1D_ARRAY_V4B8_TRAP
5790  : NVPTXInst<(outs),
5791              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5792                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5793              "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
5794              "\\{$r, $g, $b, $a\\};",
5795              []>;
5796def SUST_P_1D_ARRAY_V4B16_TRAP
5797  : NVPTXInst<(outs),
5798              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
5799                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5800             "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
5801             "\\{$r, $g, $b, $a\\};",
5802              []>;
5803def SUST_P_1D_ARRAY_V4B32_TRAP
5804  : NVPTXInst<(outs),
5805              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
5806                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5807             "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
5808             "\\{$r, $g, $b, $a\\};",
5809              []>;
5810
5811
5812def SUST_P_2D_B8_TRAP
5813  : NVPTXInst<(outs),
5814              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5815              "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5816              []>;
5817def SUST_P_2D_B16_TRAP
5818  : NVPTXInst<(outs),
5819              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5820              "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5821              []>;
5822def SUST_P_2D_B32_TRAP
5823  : NVPTXInst<(outs),
5824              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5825              "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
5826              []>;
5827def SUST_P_2D_V2B8_TRAP
5828  : NVPTXInst<(outs),
5829              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5830                   Int16Regs:$g),
5831              "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5832              []>;
5833def SUST_P_2D_V2B16_TRAP
5834  : NVPTXInst<(outs),
5835              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5836                   Int16Regs:$g),
5837              "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5838              []>;
5839def SUST_P_2D_V2B32_TRAP
5840  : NVPTXInst<(outs),
5841              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5842                   Int32Regs:$g),
5843              "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
5844              []>;
5845def SUST_P_2D_V4B8_TRAP
5846  : NVPTXInst<(outs),
5847              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5848                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5849              "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
5850              "\\{$r, $g, $b, $a\\};",
5851              []>;
5852def SUST_P_2D_V4B16_TRAP
5853  : NVPTXInst<(outs),
5854              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
5855                   Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5856             "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
5857             "\\{$r, $g, $b, $a\\};",
5858              []>;
5859def SUST_P_2D_V4B32_TRAP
5860  : NVPTXInst<(outs),
5861              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5862                   Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5863             "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
5864             "\\{$r, $g, $b, $a\\};",
5865              []>;
5866
5867
5868def SUST_P_2D_ARRAY_B8_TRAP
5869  : NVPTXInst<(outs),
5870              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5871                   Int16Regs:$r),
5872              "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5873              []>;
5874def SUST_P_2D_ARRAY_B16_TRAP
5875  : NVPTXInst<(outs),
5876              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5877                   Int16Regs:$r),
5878              "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5879              []>;
5880def SUST_P_2D_ARRAY_B32_TRAP
5881  : NVPTXInst<(outs),
5882              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5883                   Int32Regs:$r),
5884              "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
5885              []>;
5886def SUST_P_2D_ARRAY_V2B8_TRAP
5887  : NVPTXInst<(outs),
5888              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5889                   Int16Regs:$r, Int16Regs:$g),
5890              "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5891              "\\{$r, $g\\};",
5892              []>;
5893def SUST_P_2D_ARRAY_V2B16_TRAP
5894  : NVPTXInst<(outs),
5895              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5896                   Int16Regs:$r, Int16Regs:$g),
5897             "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5898             "\\{$r, $g\\};",
5899              []>;
5900def SUST_P_2D_ARRAY_V2B32_TRAP
5901  : NVPTXInst<(outs),
5902              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5903                   Int32Regs:$r, Int32Regs:$g),
5904             "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5905             "\\{$r, $g\\};",
5906              []>;
5907def SUST_P_2D_ARRAY_V4B8_TRAP
5908  : NVPTXInst<(outs),
5909              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5910                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5911      "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5912      "\\{$r, $g, $b, $a\\};",
5913              []>;
5914def SUST_P_2D_ARRAY_V4B16_TRAP
5915  : NVPTXInst<(outs),
5916              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5917                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5918     "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5919     "\\{$r, $g, $b, $a\\};",
5920              []>;
5921def SUST_P_2D_ARRAY_V4B32_TRAP
5922  : NVPTXInst<(outs),
5923              (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
5924                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5925     "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
5926     "\\{$r, $g, $b, $a\\};",
5927              []>;
5928
5929
5930def SUST_P_3D_B8_TRAP
5931  : NVPTXInst<(outs),
5932              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5933                   Int16Regs:$r),
5934              "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5935              []>;
5936def SUST_P_3D_B16_TRAP
5937  : NVPTXInst<(outs),
5938              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5939                   Int16Regs:$r),
5940              "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5941              []>;
5942def SUST_P_3D_B32_TRAP
5943  : NVPTXInst<(outs),
5944              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5945                   Int32Regs:$r),
5946              "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
5947              []>;
5948def SUST_P_3D_V2B8_TRAP
5949  : NVPTXInst<(outs),
5950              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5951                   Int16Regs:$r, Int16Regs:$g),
5952              "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5953              "\\{$r, $g\\};",
5954              []>;
5955def SUST_P_3D_V2B16_TRAP
5956  : NVPTXInst<(outs),
5957              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5958                   Int16Regs:$r, Int16Regs:$g),
5959              "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5960              "\\{$r, $g\\};",
5961              []>;
5962def SUST_P_3D_V2B32_TRAP
5963  : NVPTXInst<(outs),
5964              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5965                   Int32Regs:$r, Int32Regs:$g),
5966              "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5967              "\\{$r, $g\\};",
5968              []>;
5969def SUST_P_3D_V4B8_TRAP
5970  : NVPTXInst<(outs),
5971              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5972                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5973         "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5974         "\\{$r, $g, $b, $a\\};",
5975              []>;
5976def SUST_P_3D_V4B16_TRAP
5977  : NVPTXInst<(outs),
5978              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5979                   Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5980        "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5981        "\\{$r, $g, $b, $a\\};",
5982              []>;
5983def SUST_P_3D_V4B32_TRAP
5984  : NVPTXInst<(outs),
5985              (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5986                   Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5987        "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
5988        "\\{$r, $g, $b, $a\\};",
5989              []>;
5990}
5991
5992// Surface store instruction patterns
5993// I'm not sure why we can't just include these in the instruction definitions,
5994// but TableGen complains of type errors :(
5995
5996// .clamp variant
5997def : Pat<(int_nvvm_sust_b_1d_i8_clamp
5998           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5999          (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6000
6001def : Pat<(int_nvvm_sust_b_1d_i16_clamp
6002           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6003          (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6004
6005def : Pat<(int_nvvm_sust_b_1d_i32_clamp
6006           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6007          (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6008
6009def : Pat<(int_nvvm_sust_b_1d_i64_clamp
6010           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6011          (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6012
6013def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
6014           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6015          (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6016           Int16Regs:$r, Int16Regs:$g)>;
6017
6018def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
6019           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6020          (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6021           Int16Regs:$r, Int16Regs:$g)>;
6022
6023def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
6024           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6025          (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6026           Int32Regs:$r, Int32Regs:$g)>;
6027
6028def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
6029           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6030          (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
6031           Int64Regs:$r, Int64Regs:$g)>;
6032
6033def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
6034           Int64Regs:$s, Int32Regs:$x,
6035           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6036          (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
6037           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6038
6039def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
6040           Int64Regs:$s, Int32Regs:$x,
6041           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6042          (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
6043           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6044
6045def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
6046           Int64Regs:$s, Int32Regs:$x,
6047           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6048          (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
6049           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6050
6051
6052
6053def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
6054           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6055          (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6056           Int16Regs:$r)>;
6057
6058def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
6059           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6060          (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6061           Int16Regs:$r)>;
6062
6063def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
6064           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6065          (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6066           Int32Regs:$r)>;
6067
6068def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
6069           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6070          (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6071           Int64Regs:$r)>;
6072
6073def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
6074          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6075          (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6076           Int16Regs:$r, Int16Regs:$g)>;
6077
6078def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
6079          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6080          (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6081           Int16Regs:$r, Int16Regs:$g)>;
6082
6083def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
6084          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6085          (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6086           Int32Regs:$r, Int32Regs:$g)>;
6087
6088def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
6089          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6090          (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6091           Int64Regs:$r, Int64Regs:$g)>;
6092
6093def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
6094           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6095           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6096          (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6097           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6098
6099def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
6100           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6101           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6102          (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6103           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6104
6105def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
6106           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6107           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6108          (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6109           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6110
6111
6112
6113def : Pat<(int_nvvm_sust_b_2d_i8_clamp
6114           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6115          (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6116           Int16Regs:$r)>;
6117
6118def : Pat<(int_nvvm_sust_b_2d_i16_clamp
6119           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6120          (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6121           Int16Regs:$r)>;
6122
6123def : Pat<(int_nvvm_sust_b_2d_i32_clamp
6124           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6125          (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6126           Int32Regs:$r)>;
6127
6128def : Pat<(int_nvvm_sust_b_2d_i64_clamp
6129           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6130          (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6131           Int64Regs:$r)>;
6132
6133def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
6134          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6135          (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6136           Int16Regs:$r, Int16Regs:$g)>;
6137
6138def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
6139          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6140          (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6141           Int16Regs:$r, Int16Regs:$g)>;
6142
6143def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
6144          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6145          (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6146           Int32Regs:$r, Int32Regs:$g)>;
6147
6148def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
6149          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6150          (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6151           Int64Regs:$r, Int64Regs:$g)>;
6152
6153def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
6154           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6155           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6156          (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6157           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6158
6159def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
6160           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6161           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6162          (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6163           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6164
6165def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
6166           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6167           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6168          (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6169           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6170
6171
6172
6173def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
6174          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6175          (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
6176           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6177           Int16Regs:$r)>;
6178
6179def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
6180          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6181          (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
6182           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6183           Int16Regs:$r)>;
6184
6185def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
6186          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6187          (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
6188           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6189           Int32Regs:$r)>;
6190
6191def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
6192          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6193          (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
6194           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6195           Int64Regs:$r)>;
6196
6197def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
6198           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6199           Int16Regs:$r, Int16Regs:$g),
6200          (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
6201           Int32Regs:$x, Int32Regs:$y,
6202           Int16Regs:$r, Int16Regs:$g)>;
6203
6204def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
6205           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6206           Int16Regs:$r, Int16Regs:$g),
6207          (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
6208           Int32Regs:$x, Int32Regs:$y,
6209           Int16Regs:$r, Int16Regs:$g)>;
6210
6211def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
6212           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6213           Int32Regs:$g),
6214          (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6215           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6216
6217def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
6218           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6219           Int64Regs:$g),
6220          (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
6221           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6222
6223def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
6224           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6225           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6226          (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
6227           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6228           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6229
6230def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
6231           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6232           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6233          (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
6234           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6235           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6236
6237def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
6238           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6239           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6240          (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
6241           Int32Regs:$x, Int32Regs:$y,
6242           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6243
6244
6245
6246def : Pat<(int_nvvm_sust_b_3d_i8_clamp
6247           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6248           Int16Regs:$r),
6249          (SUST_B_3D_B8_CLAMP Int64Regs:$s,
6250           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6251           Int16Regs:$r)>;
6252
6253def : Pat<(int_nvvm_sust_b_3d_i16_clamp
6254           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6255           Int16Regs:$r),
6256          (SUST_B_3D_B16_CLAMP Int64Regs:$s,
6257           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6258           Int16Regs:$r)>;
6259
6260def : Pat<(int_nvvm_sust_b_3d_i32_clamp
6261           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6262           Int32Regs:$r),
6263          (SUST_B_3D_B32_CLAMP Int64Regs:$s,
6264           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6265           Int32Regs:$r)>;
6266
6267def : Pat<(int_nvvm_sust_b_3d_i64_clamp
6268           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6269           Int64Regs:$r),
6270          (SUST_B_3D_B64_CLAMP Int64Regs:$s,
6271           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6272           Int64Regs:$r)>;
6273
6274def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
6275           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6276           Int16Regs:$r, Int16Regs:$g),
6277          (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
6278           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6279           Int16Regs:$r, Int16Regs:$g)>;
6280
6281def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
6282           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6283           Int16Regs:$r, Int16Regs:$g),
6284          (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
6285           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6286           Int16Regs:$r, Int16Regs:$g)>;
6287
6288def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
6289           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6290           Int32Regs:$r, Int32Regs:$g),
6291          (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
6292           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6293           Int32Regs:$r, Int32Regs:$g)>;
6294
6295def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
6296           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6297           Int64Regs:$r, Int64Regs:$g),
6298          (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
6299           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6300           Int64Regs:$r, Int64Regs:$g)>;
6301
6302def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
6303           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6304           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6305          (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
6306           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6307           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6308
6309def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
6310           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6311           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6312          (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
6313           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6314           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6315
6316def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
6317           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6318           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6319          (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
6320           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6321           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6322
6323
6324// .trap variant
6325def : Pat<(int_nvvm_sust_b_1d_i8_trap
6326           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6327          (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6328
6329def : Pat<(int_nvvm_sust_b_1d_i16_trap
6330           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6331          (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6332
6333def : Pat<(int_nvvm_sust_b_1d_i32_trap
6334           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6335          (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6336
6337def : Pat<(int_nvvm_sust_b_1d_i64_trap
6338           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6339          (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6340
6341def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
6342           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6343          (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6344           Int16Regs:$r, Int16Regs:$g)>;
6345
6346def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
6347           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6348          (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
6349           Int16Regs:$r, Int16Regs:$g)>;
6350
6351def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
6352           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6353          (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
6354           Int32Regs:$r, Int32Regs:$g)>;
6355
6356def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
6357           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6358          (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
6359           Int64Regs:$r, Int64Regs:$g)>;
6360
6361def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
6362           Int64Regs:$s, Int32Regs:$x,
6363           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6364          (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
6365           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6366
6367def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
6368           Int64Regs:$s, Int32Regs:$x,
6369           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6370          (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
6371           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6372
6373def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
6374           Int64Regs:$s, Int32Regs:$x,
6375           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6376          (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
6377           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6378
6379
6380
6381def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
6382           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6383          (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6384           Int16Regs:$r)>;
6385
6386def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
6387           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6388          (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6389           Int16Regs:$r)>;
6390
6391def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
6392           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6393          (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6394           Int32Regs:$r)>;
6395
6396def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
6397           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6398          (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6399           Int64Regs:$r)>;
6400
6401def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
6402          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6403          (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6404           Int16Regs:$r, Int16Regs:$g)>;
6405
6406def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
6407          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6408          (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6409           Int16Regs:$r, Int16Regs:$g)>;
6410
6411def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
6412          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6413          (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6414           Int32Regs:$r, Int32Regs:$g)>;
6415
6416def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
6417          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6418          (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6419           Int64Regs:$r, Int64Regs:$g)>;
6420
6421def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
6422           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6423           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6424          (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6425           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6426
6427def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
6428           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6429           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6430          (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6431           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6432
6433def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
6434           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6435           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6436          (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6437           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6438
6439
6440
6441def : Pat<(int_nvvm_sust_b_2d_i8_trap
6442           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6443          (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6444           Int16Regs:$r)>;
6445
6446def : Pat<(int_nvvm_sust_b_2d_i16_trap
6447           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6448          (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6449           Int16Regs:$r)>;
6450
6451def : Pat<(int_nvvm_sust_b_2d_i32_trap
6452           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6453          (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6454           Int32Regs:$r)>;
6455
6456def : Pat<(int_nvvm_sust_b_2d_i64_trap
6457           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6458          (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6459           Int64Regs:$r)>;
6460
6461def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
6462          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6463          (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6464           Int16Regs:$r, Int16Regs:$g)>;
6465
6466def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
6467          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6468          (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6469           Int16Regs:$r, Int16Regs:$g)>;
6470
6471def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
6472          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6473          (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6474           Int32Regs:$r, Int32Regs:$g)>;
6475
6476def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
6477          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6478          (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6479           Int64Regs:$r, Int64Regs:$g)>;
6480
6481def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
6482           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6483           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6484          (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6485           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6486
6487def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
6488           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6489           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6490          (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6491           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6492
6493def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
6494           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6495           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6496          (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6497           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6498
6499
6500
6501def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
6502          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6503          (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
6504           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6505           Int16Regs:$r)>;
6506
6507def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
6508          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6509          (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
6510           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6511           Int16Regs:$r)>;
6512
6513def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
6514          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6515          (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
6516           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6517           Int32Regs:$r)>;
6518
6519def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
6520          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6521          (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
6522           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6523           Int64Regs:$r)>;
6524
6525def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
6526           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6527           Int16Regs:$r, Int16Regs:$g),
6528          (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
6529           Int32Regs:$x, Int32Regs:$y,
6530           Int16Regs:$r, Int16Regs:$g)>;
6531
6532def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
6533           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6534           Int16Regs:$r, Int16Regs:$g),
6535          (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
6536           Int32Regs:$x, Int32Regs:$y,
6537           Int16Regs:$r, Int16Regs:$g)>;
6538
6539def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
6540           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6541           Int32Regs:$g),
6542          (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
6543           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6544
6545def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
6546           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6547           Int64Regs:$g),
6548          (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
6549           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6550
6551def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
6552           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6553           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6554          (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
6555           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6556           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6557
6558def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
6559           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6560           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6561          (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
6562           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6563           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6564
6565def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
6566           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6567           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6568          (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
6569           Int32Regs:$x, Int32Regs:$y,
6570           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6571
6572
6573
6574def : Pat<(int_nvvm_sust_b_3d_i8_trap
6575           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6576           Int16Regs:$r),
6577          (SUST_B_3D_B8_TRAP Int64Regs:$s,
6578           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6579           Int16Regs:$r)>;
6580
6581def : Pat<(int_nvvm_sust_b_3d_i16_trap
6582           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6583           Int16Regs:$r),
6584          (SUST_B_3D_B16_TRAP Int64Regs:$s,
6585           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6586           Int16Regs:$r)>;
6587
6588def : Pat<(int_nvvm_sust_b_3d_i32_trap
6589           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6590           Int32Regs:$r),
6591          (SUST_B_3D_B32_TRAP Int64Regs:$s,
6592           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6593           Int32Regs:$r)>;
6594
6595def : Pat<(int_nvvm_sust_b_3d_i64_trap
6596           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6597           Int64Regs:$r),
6598          (SUST_B_3D_B64_TRAP Int64Regs:$s,
6599           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6600           Int64Regs:$r)>;
6601
6602def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
6603           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6604           Int16Regs:$r, Int16Regs:$g),
6605          (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
6606           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6607           Int16Regs:$r, Int16Regs:$g)>;
6608
6609def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
6610           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6611           Int16Regs:$r, Int16Regs:$g),
6612          (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
6613           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6614           Int16Regs:$r, Int16Regs:$g)>;
6615
6616def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
6617           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6618           Int32Regs:$r, Int32Regs:$g),
6619          (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
6620           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6621           Int32Regs:$r, Int32Regs:$g)>;
6622
6623def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
6624           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6625           Int64Regs:$r, Int64Regs:$g),
6626          (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
6627           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6628           Int64Regs:$r, Int64Regs:$g)>;
6629
6630def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
6631           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6632           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6633          (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
6634           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6635           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6636
6637def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
6638           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6639           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6640          (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
6641           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6642           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6643
6644def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
6645           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6646           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6647          (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
6648           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6649           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6650
6651
6652// .zero variant
6653def : Pat<(int_nvvm_sust_b_1d_i8_zero
6654           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6655          (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6656
6657def : Pat<(int_nvvm_sust_b_1d_i16_zero
6658           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6659          (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6660
6661def : Pat<(int_nvvm_sust_b_1d_i32_zero
6662           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6663          (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6664
6665def : Pat<(int_nvvm_sust_b_1d_i64_zero
6666           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
6667          (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
6668
6669def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
6670           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6671          (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
6672           Int16Regs:$r, Int16Regs:$g)>;
6673
6674def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
6675           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6676          (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
6677           Int16Regs:$r, Int16Regs:$g)>;
6678
6679def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
6680           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6681          (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
6682           Int32Regs:$r, Int32Regs:$g)>;
6683
6684def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
6685           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6686          (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
6687           Int64Regs:$r, Int64Regs:$g)>;
6688
6689def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
6690           Int64Regs:$s, Int32Regs:$x,
6691           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6692          (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
6693           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6694
6695def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
6696           Int64Regs:$s, Int32Regs:$x,
6697           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6698          (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
6699           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6700
6701def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
6702           Int64Regs:$s, Int32Regs:$x,
6703           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6704          (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
6705           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6706
6707
6708
6709def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
6710           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6711          (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6712           Int16Regs:$r)>;
6713
6714def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
6715           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6716          (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6717           Int16Regs:$r)>;
6718
6719def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
6720           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6721          (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6722           Int32Regs:$r)>;
6723
6724def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
6725           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
6726          (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6727           Int64Regs:$r)>;
6728
6729def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
6730          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6731          (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6732           Int16Regs:$r, Int16Regs:$g)>;
6733
6734def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
6735          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6736          (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6737           Int16Regs:$r, Int16Regs:$g)>;
6738
6739def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
6740          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6741          (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6742           Int32Regs:$r, Int32Regs:$g)>;
6743
6744def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
6745          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
6746          (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6747           Int64Regs:$r, Int64Regs:$g)>;
6748
6749def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
6750           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6751           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6752          (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6753           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6754
6755def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
6756           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6757           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6758          (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6759           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6760
6761def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
6762           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6763           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6764          (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6765           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6766
6767
6768
6769def : Pat<(int_nvvm_sust_b_2d_i8_zero
6770           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6771          (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6772           Int16Regs:$r)>;
6773
6774def : Pat<(int_nvvm_sust_b_2d_i16_zero
6775           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6776          (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6777           Int16Regs:$r)>;
6778
6779def : Pat<(int_nvvm_sust_b_2d_i32_zero
6780           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6781          (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6782           Int32Regs:$r)>;
6783
6784def : Pat<(int_nvvm_sust_b_2d_i64_zero
6785           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6786          (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6787           Int64Regs:$r)>;
6788
6789def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
6790          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6791          (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6792           Int16Regs:$r, Int16Regs:$g)>;
6793
6794def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
6795          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6796          (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6797           Int16Regs:$r, Int16Regs:$g)>;
6798
6799def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
6800          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6801          (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6802           Int32Regs:$r, Int32Regs:$g)>;
6803
6804def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
6805          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
6806          (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6807           Int64Regs:$r, Int64Regs:$g)>;
6808
6809def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
6810           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6811           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6812          (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6813           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6814
6815def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
6816           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6817           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6818          (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6819           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6820
6821def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
6822           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6823           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6824          (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6825           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6826
6827
6828
6829def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
6830          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6831          (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
6832           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6833           Int16Regs:$r)>;
6834
6835def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
6836          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6837          (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
6838           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6839           Int16Regs:$r)>;
6840
6841def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
6842          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6843          (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
6844           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6845           Int32Regs:$r)>;
6846
6847def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
6848          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
6849          (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
6850           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6851           Int64Regs:$r)>;
6852
6853def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
6854           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6855           Int16Regs:$r, Int16Regs:$g),
6856          (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
6857           Int32Regs:$x, Int32Regs:$y,
6858           Int16Regs:$r, Int16Regs:$g)>;
6859
6860def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
6861           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6862           Int16Regs:$r, Int16Regs:$g),
6863          (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
6864           Int32Regs:$x, Int32Regs:$y,
6865           Int16Regs:$r, Int16Regs:$g)>;
6866
6867def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
6868           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6869           Int32Regs:$g),
6870          (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
6871           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6872
6873def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
6874           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
6875           Int64Regs:$g),
6876          (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
6877           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
6878
6879def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
6880           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6881           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6882          (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
6883           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6884           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6885
6886def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
6887           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6888           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6889          (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
6890           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6891           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6892
6893def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
6894           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6895           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6896          (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
6897           Int32Regs:$x, Int32Regs:$y,
6898           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6899
6900
6901
6902def : Pat<(int_nvvm_sust_b_3d_i8_zero
6903           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6904           Int16Regs:$r),
6905          (SUST_B_3D_B8_ZERO Int64Regs:$s,
6906           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6907           Int16Regs:$r)>;
6908
6909def : Pat<(int_nvvm_sust_b_3d_i16_zero
6910           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6911           Int16Regs:$r),
6912          (SUST_B_3D_B16_ZERO Int64Regs:$s,
6913           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6914           Int16Regs:$r)>;
6915
6916def : Pat<(int_nvvm_sust_b_3d_i32_zero
6917           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6918           Int32Regs:$r),
6919          (SUST_B_3D_B32_ZERO Int64Regs:$s,
6920           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6921           Int32Regs:$r)>;
6922
6923def : Pat<(int_nvvm_sust_b_3d_i64_zero
6924           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6925           Int64Regs:$r),
6926          (SUST_B_3D_B64_ZERO Int64Regs:$s,
6927           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6928           Int64Regs:$r)>;
6929
6930def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
6931           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6932           Int16Regs:$r, Int16Regs:$g),
6933          (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
6934           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6935           Int16Regs:$r, Int16Regs:$g)>;
6936
6937def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
6938           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6939           Int16Regs:$r, Int16Regs:$g),
6940          (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
6941           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6942           Int16Regs:$r, Int16Regs:$g)>;
6943
6944def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
6945           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6946           Int32Regs:$r, Int32Regs:$g),
6947          (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
6948           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6949           Int32Regs:$r, Int32Regs:$g)>;
6950
6951def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
6952           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6953           Int64Regs:$r, Int64Regs:$g),
6954          (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
6955           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6956           Int64Regs:$r, Int64Regs:$g)>;
6957
6958def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
6959           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6960           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6961          (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
6962           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6963           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6964
6965def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
6966           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6967           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6968          (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
6969           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6970           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6971
6972def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
6973           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6974           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6975          (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
6976           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6977           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6978
6979
6980
6981
6982def : Pat<(int_nvvm_sust_p_1d_i8_trap
6983           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6984          (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6985
6986def : Pat<(int_nvvm_sust_p_1d_i16_trap
6987           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6988          (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
6989
6990def : Pat<(int_nvvm_sust_p_1d_i32_trap
6991           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6992          (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
6993
6994def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
6995           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6996          (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
6997           Int16Regs:$r, Int16Regs:$g)>;
6998
6999def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
7000           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7001          (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
7002           Int16Regs:$r, Int16Regs:$g)>;
7003
7004def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
7005           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7006          (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
7007           Int32Regs:$r, Int32Regs:$g)>;
7008
7009def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
7010           Int64Regs:$s, Int32Regs:$x,
7011           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7012          (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
7013           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7014
7015def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
7016           Int64Regs:$s, Int32Regs:$x,
7017           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7018          (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
7019           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7020
7021def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
7022           Int64Regs:$s, Int32Regs:$x,
7023           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7024          (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
7025           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7026
7027
7028
7029def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
7030           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7031          (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7032           Int16Regs:$r)>;
7033
7034def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
7035           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
7036          (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7037           Int16Regs:$r)>;
7038
7039def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
7040           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
7041          (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7042           Int32Regs:$r)>;
7043
7044def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
7045          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7046          (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7047           Int16Regs:$r, Int16Regs:$g)>;
7048
7049def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
7050          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
7051          (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7052           Int16Regs:$r, Int16Regs:$g)>;
7053
7054def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
7055          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
7056          (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7057           Int32Regs:$r, Int32Regs:$g)>;
7058
7059def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
7060           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7061           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7062          (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7063           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7064
7065def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
7066           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7067           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7068          (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7069           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7070
7071def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
7072           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7073           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7074          (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
7075           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7076
7077
7078
7079def : Pat<(int_nvvm_sust_p_2d_i8_trap
7080           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7081          (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7082           Int16Regs:$r)>;
7083
7084def : Pat<(int_nvvm_sust_p_2d_i16_trap
7085           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7086          (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7087           Int16Regs:$r)>;
7088
7089def : Pat<(int_nvvm_sust_p_2d_i32_trap
7090           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7091          (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7092           Int32Regs:$r)>;
7093
7094def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
7095          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7096          (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7097           Int16Regs:$r, Int16Regs:$g)>;
7098
7099def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
7100          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
7101          (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7102           Int16Regs:$r, Int16Regs:$g)>;
7103
7104def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
7105          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
7106          (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7107           Int32Regs:$r, Int32Regs:$g)>;
7108
7109def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
7110           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7111           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7112          (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7113           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7114
7115def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
7116           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7117           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7118          (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7119           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7120
7121def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
7122           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7123           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7124          (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
7125           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7126
7127
7128
7129def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
7130          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7131          (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
7132           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7133           Int16Regs:$r)>;
7134
7135def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
7136          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
7137          (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
7138           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7139           Int16Regs:$r)>;
7140
7141def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
7142          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
7143          (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
7144           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7145           Int32Regs:$r)>;
7146
7147def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
7148           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7149           Int16Regs:$r, Int16Regs:$g),
7150          (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
7151           Int32Regs:$x, Int32Regs:$y,
7152           Int16Regs:$r, Int16Regs:$g)>;
7153
7154def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
7155           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7156           Int16Regs:$r, Int16Regs:$g),
7157          (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
7158           Int32Regs:$x, Int32Regs:$y,
7159           Int16Regs:$r, Int16Regs:$g)>;
7160
7161def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
7162           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
7163           Int32Regs:$g),
7164          (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
7165           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
7166
7167def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
7168           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7169           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7170          (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
7171           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7172           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7173
7174def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
7175           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7176           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7177          (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
7178           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7179           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7180
7181def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
7182           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
7183           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7184          (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
7185           Int32Regs:$x, Int32Regs:$y,
7186           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7187
7188
7189
7190def : Pat<(int_nvvm_sust_p_3d_i8_trap
7191           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7192           Int16Regs:$r),
7193          (SUST_P_3D_B8_TRAP Int64Regs:$s,
7194           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7195           Int16Regs:$r)>;
7196
7197def : Pat<(int_nvvm_sust_p_3d_i16_trap
7198           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7199           Int16Regs:$r),
7200          (SUST_P_3D_B16_TRAP Int64Regs:$s,
7201           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7202           Int16Regs:$r)>;
7203
7204def : Pat<(int_nvvm_sust_p_3d_i32_trap
7205           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7206           Int32Regs:$r),
7207          (SUST_P_3D_B32_TRAP Int64Regs:$s,
7208           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7209           Int32Regs:$r)>;
7210
7211def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
7212           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7213           Int16Regs:$r, Int16Regs:$g),
7214          (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
7215           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7216           Int16Regs:$r, Int16Regs:$g)>;
7217
7218def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
7219           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7220           Int16Regs:$r, Int16Regs:$g),
7221          (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
7222           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7223           Int16Regs:$r, Int16Regs:$g)>;
7224
7225def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
7226           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7227           Int32Regs:$r, Int32Regs:$g),
7228          (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
7229           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7230           Int32Regs:$r, Int32Regs:$g)>;
7231
7232def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
7233           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7234           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7235          (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
7236           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7237           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7238
7239def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
7240           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7241           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
7242          (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
7243           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7244           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
7245
7246def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
7247           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7248           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
7249          (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
7250           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
7251           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
7252
7253//-----------------------------------
7254// Read Special Registers
7255//-----------------------------------
7256
7257class PTX_READ_SREG_R64<string regname, Intrinsic intop>
7258  : NVPTXInst<(outs Int64Regs:$d), (ins),
7259              !strconcat("mov.u64 \t$d, %", regname, ";"),
7260              [(set Int64Regs:$d, (intop))]>;
7261
7262class PTX_READ_SREG_R32<string regname, Intrinsic intop>
7263  : NVPTXInst<(outs Int32Regs:$d), (ins),
7264              !strconcat("mov.u32 \t$d, %", regname, ";"),
7265              [(set Int32Regs:$d, (intop))]>;
7266
7267// TODO Add read vector-version of special registers
7268
7269def INT_PTX_SREG_TID_X :
7270    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
7271def INT_PTX_SREG_TID_Y :
7272    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
7273def INT_PTX_SREG_TID_Z :
7274    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
7275def INT_PTX_SREG_TID_W :
7276    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
7277
7278def INT_PTX_SREG_NTID_X :
7279    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
7280def INT_PTX_SREG_NTID_Y :
7281    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
7282def INT_PTX_SREG_NTID_Z :
7283    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
7284def INT_PTX_SREG_NTID_W :
7285    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
7286
7287def INT_PTX_SREG_LANEID :
7288    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
7289def INT_PTX_SREG_WARPID :
7290    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
7291def INT_PTX_SREG_NWARPID :
7292    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
7293
7294def INT_PTX_SREG_CTAID_X :
7295    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
7296def INT_PTX_SREG_CTAID_Y :
7297    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
7298def INT_PTX_SREG_CTAID_Z :
7299    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
7300def INT_PTX_SREG_CTAID_W :
7301    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
7302
7303def INT_PTX_SREG_NCTAID_X :
7304    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
7305def INT_PTX_SREG_NCTAID_Y :
7306    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
7307def INT_PTX_SREG_NCTAID_Z :
7308    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
7309def INT_PTX_SREG_NCTAID_W :
7310    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
7311
7312def INT_PTX_SREG_SMID :
7313    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
7314def INT_PTX_SREG_NSMID :
7315    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
7316def INT_PTX_SREG_GRIDID :
7317    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
7318
7319def INT_PTX_SREG_LANEMASK_EQ :
7320    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
7321def INT_PTX_SREG_LANEMASK_LE :
7322    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
7323def INT_PTX_SREG_LANEMASK_LT :
7324    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
7325def INT_PTX_SREG_LANEMASK_GE :
7326    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
7327def INT_PTX_SREG_LANEMASK_GT :
7328    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
7329
7330def INT_PTX_SREG_CLOCK :
7331    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
7332def INT_PTX_SREG_CLOCK64 :
7333    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
7334
7335def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
7336def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
7337def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
7338def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
7339
7340// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
7341// handle the constant.
7342def INT_PTX_SREG_WARPSIZE :
7343    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
7344              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
7345
7346// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
7347// In addition to target-independent fields provided by WMMA_REGS, it adds
7348// the fields commonly used to implement specific PTX instruction -- register
7349// types and names, constraints, parts of assembly, etc.
7350class WMMA_REGINFO<WMMA_REGS r>
7351      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
7352  // NVPTX register types used to carry fragment data.
7353  NVPTXRegClass regclass = !cond(
7354    !eq(ptx_elt_type, "f16") : Float16x2Regs,
7355    !eq(ptx_elt_type, "f32") : Float32Regs,
7356    !eq(ptx_elt_type, "s32") : Int32Regs,
7357    !eq(ptx_elt_type, "s8") : Int32Regs,
7358    !eq(ptx_elt_type, "u8") : Int32Regs,
7359    !eq(ptx_elt_type, "s4") : Int32Regs,
7360    !eq(ptx_elt_type, "u4") : Int32Regs,
7361    !eq(ptx_elt_type, "b1") : Int32Regs);
7362
7363  // Instruction input/output arguments for the fragment.
7364  list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
7365
7366  // List of register names for the fragment -- ["ra0", "ra1",...]
7367  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
7368
7369  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
7370  string regstring = "{{$" # !head(reg_names)
7371                           # !foldl("", !tail(reg_names), a, b,
7372                                    !strconcat(a, ", $", b))
7373                     # "}}";
7374
7375  // Predicates for particular fragment variant. Technically those are
7376  // per-instruction predicates, but currently all fragments that can be used in
7377  // a given instruction are subject to the same constraints, so an instruction
7378  // can use predicates from any of its fragments. If/when this is no
7379  // longer the case, we can concat all per-fragment predicates to enforce that
7380  // all fragments of the instruction are viable.
7381  list<Predicate> Predicates = !cond(
7382    // fp16 -> fp16/fp32 @ m16n16k16
7383    !and(!eq(geom, "m16n16k16"),
7384         !or(!eq(ptx_elt_type, "f16"),
7385             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
7386
7387    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
7388    !and(!or(!eq(geom, "m8n32k16"),
7389             !eq(geom, "m32n8k16")),
7390         !or(!eq(ptx_elt_type, "f16"),
7391             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
7392
7393    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
7394    !and(!or(!eq(geom,"m16n16k16"),
7395             !eq(geom,"m8n32k16"),
7396             !eq(geom,"m32n8k16")),
7397         !or(!eq(ptx_elt_type, "u8"),
7398             !eq(ptx_elt_type, "s8"),
7399             !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
7400
7401    // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1)
7402    !or(!eq(geom,"m8n8k128"),
7403        !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
7404
7405    !eq(geom, "m8n8k4") : [hasSM70, hasPTX64]);
7406
7407  // template DAGs for instruction inputs/output.
7408  dag Outs = !dag(outs, ptx_regs, reg_names);
7409  dag Ins = !dag(ins, ptx_regs, reg_names);
7410}
7411
7412// Convert dag of arguments into a dag to match given intrinsic.
7413class BuildPatternI<Intrinsic Intr, dag Ins> {
7414  // Build a dag pattern that matches the intrinsic call.
7415  dag ret = !foreach(tmp, Ins,
7416                          !subst(imem, ADDRvar,
7417                          !subst(MEMri64, ADDRri64,
7418                          !subst(MEMri, ADDRri,
7419                          !subst(ins, Intr, tmp)))));
7420}
7421
7422// Same as above, but uses PatFrag instead of an Intrinsic.
7423class BuildPatternPF<PatFrag Intr, dag Ins> {
7424  // Build a dag pattern that matches the intrinsic call.
7425  dag ret = !foreach(tmp, Ins,
7426                          !subst(imem, ADDRvar,
7427                          !subst(MEMri64, ADDRri64,
7428                          !subst(MEMri, ADDRri,
7429                          !subst(ins, Intr, tmp)))));
7430}
7431
7432// Common WMMA-related fields used for building patterns for all MMA instructions.
7433class WMMA_INSTR<string _Intr, list<dag> _Args>
7434  : NVPTXInst<(outs), (ins), "?", []> {
7435  Intrinsic Intr = !cast<Intrinsic>(_Intr);
7436  // Concatenate all arguments into a single dag.
7437  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
7438  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
7439  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
7440}
7441
7442//
7443// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7444//
7445
7446class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
7447                DAGOperand SrcOp>
7448  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
7449                              [!con((ins SrcOp:$src),
7450                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7451    Requires<Frag.Predicates> {
7452  // Load/store intrinsics are overloaded on pointer's address space.
7453  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7454  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7455  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
7456  // Build PatFrag that only matches particular address space.
7457  PatFrag IntrFrag = PatFrag<PFOperands,
7458                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7459                             !cond(!eq(Space, ".shared"): AS_match.shared,
7460                                   !eq(Space, ".global"): AS_match.global,
7461                                   1: AS_match.generic)>;
7462  // Build AS-constrained pattern.
7463  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7464
7465  let OutOperandList = Frag.Outs;
7466  let InOperandList = !con(Args, (ins MmaCode:$ptx));
7467  let AsmString = "wmma.load."
7468                  # Frag.frag
7469                  # ".sync"
7470                  # "${ptx:aligned}"
7471                  # "." # Layout
7472                  # "." # Frag.geom
7473                  # Space
7474                  # "." # Frag.ptx_elt_type # " \t"
7475                  # Frag.regstring
7476                  # ", [$src]"
7477                  # !if(WithStride, ", $ldm", "")
7478                  # ";";
7479}
7480
7481//
7482// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
7483//
7484class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
7485                   bit WithStride, DAGOperand DstOp>
7486  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
7487               [!con((ins DstOp:$dst),
7488                     Frag.Ins,
7489                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
7490    Requires<Frag.Predicates> {
7491
7492  // Load/store intrinsics are overloaded on pointer's address space.
7493  // To match the right intrinsic, we need to build AS-constrained PatFrag.
7494  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
7495  dag PFOperands = !con((ops node:$dst),
7496                        !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
7497                        !if(WithStride, (ops node:$ldm), (ops)));
7498  // Build PatFrag that only matches particular address space.
7499  PatFrag IntrFrag = PatFrag<PFOperands,
7500                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
7501                             !cond(!eq(Space, ".shared"): AS_match.shared,
7502                                   !eq(Space, ".global"): AS_match.global,
7503                                   1: AS_match.generic)>;
7504  // Build AS-constrained pattern.
7505  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
7506
7507  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7508  let OutOperandList = (outs);
7509  let AsmString = "wmma.store.d.sync"
7510                  # "${ptx:aligned}"
7511                  # "." # Layout
7512                  # "." # Frag.geom
7513                  # Space
7514                  # "." # Frag.ptx_elt_type
7515                  # " \t[$dst],"
7516                  # Frag.regstring
7517                  # !if(WithStride, ", $ldm", "")
7518                  # ";";
7519}
7520
7521// Create all load/store variants
7522defset list<WMMA_INSTR> MMA_LDSTs  = {
7523  foreach layout = ["row", "col"] in {
7524    foreach stride = [0, 1] in {
7525      foreach space = [".global", ".shared", ""] in {
7526        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
7527          foreach frag = NVVM_MMA_OPS.all_ld_ops in
7528            foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7529              def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7530          foreach frag = NVVM_MMA_OPS.all_st_ops in
7531            foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in
7532              def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>;
7533        } // addr
7534      } // space
7535    } // stride
7536  } // layout
7537} // defset
7538
7539// WMMA.MMA
7540class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
7541               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
7542               string ALayout, string BLayout, int Satfinite>
7543  : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record,
7544                             [FragA.Ins, FragB.Ins, FragC.Ins]>,
7545    // Requires does not seem to have effect on Instruction w/o Patterns.
7546    // We set it here anyways and propagate to the Pat<> we construct below.
7547    Requires<FragA.Predicates> {
7548  let OutOperandList = FragD.Outs;
7549  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
7550  string TypeList = !cond(
7551    !eq(FragD.geom, "m8n8k4") : "." # FragD.ptx_elt_type
7552                                # ".f16.f16."
7553                                # FragC.ptx_elt_type,
7554    !eq(FragD.ptx_elt_type, "s32") : ".s32"
7555                                     # "." # FragA.ptx_elt_type
7556                                     # "." # FragB.ptx_elt_type
7557                                     # ".s32",
7558    1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type,
7559  );
7560  let AsmString = !if(!eq(FragA.geom, "m8n8k4"),
7561     "mma.sync.aligned.m8n8k4"
7562        # "." # ALayout
7563        # "." # BLayout
7564        # TypeList # "\n\t\t"
7565        # FragD.regstring # ",\n\t\t"
7566        # FragA.regstring # ",\n\t\t"
7567        # FragB.regstring # ",\n\t\t"
7568        # FragC.regstring # ";",
7569     "wmma.mma"
7570        # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "")
7571        # ".sync"
7572        # "${ptx:aligned}"
7573        # "." # ALayout
7574        # "." # BLayout
7575        # "." # FragA.geom
7576        # TypeList
7577        # !if(Satfinite, ".satfinite", "") # "\n\t\t"
7578        # FragD.regstring # ",\n\t\t"
7579        # FragA.regstring # ",\n\t\t"
7580        # FragB.regstring # ",\n\t\t"
7581        # FragC.regstring # ";");
7582}
7583
7584defset list<WMMA_INSTR> MMAs  = {
7585  foreach layout_a = ["row", "col"] in {
7586    foreach layout_b = ["row", "col"] in {
7587      foreach satf = [0, 1] in {
7588        foreach op = NVVM_MMA_OPS.all_mma_ops in {
7589          foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in {
7590            def : WMMA_MMA<WMMA_REGINFO<op[0]>,
7591                           WMMA_REGINFO<op[1]>,
7592                           WMMA_REGINFO<op[2]>,
7593                           WMMA_REGINFO<op[3]>,
7594                           layout_a, layout_b, satf>;
7595          }
7596        } // op
7597      } // satf
7598    } // layout_b
7599  } // layout_a
7600} // defset
7601
7602
7603// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
7604// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
7605// the instruction record.
7606class WMMA_PAT<WMMA_INSTR wi>
7607      : Pat<wi.IntrinsicPattern,
7608            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
7609                 (wi ptx.version))>,
7610        Requires<wi.Predicates>;
7611
7612// Build intrinsic->instruction patterns for all MMA instructions.
7613foreach mma = !listconcat(MMAs, MMA_LDSTs) in
7614  def : WMMA_PAT<mma>;
7615