xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (revision c9539b89010900499a200cdd6c0265ea5d950875)
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX60, hasSM30]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX60, hasSM30]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX60, hasSM30]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX60, hasSM30]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX60, hasSM30]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX60, hasSM30]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX60, hasSM30]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX60, hasSM30]>;
135
136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137                 bit offset_imm, bit mask_imm, bit threadmask_imm>
138      : NVPTXInst<(outs), (ins), "?", []> {
139  NVPTXRegClass rc = !cond(
140    !eq(reg, "i32"): Int32Regs,
141    !eq(reg, "f32"): Float32Regs);
142  string IntrName = "int_nvvm_shfl_"
143                    # !if(sync, "sync_", "")
144                    # mode
145                    # "_" # reg
146                    # !if(return_pred, "p", "");
147  Intrinsic Intr = !cast<Intrinsic>(IntrName);
148  let InOperandList = !con(
149    !if(sync,
150        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
151        (ins)),
152    (ins rc:$src),
153    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
155    );
156  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157  let AsmString = "shfl."
158     # !if(sync, "sync.", "")
159     # mode # ".b32\t"
160     # "$dst"
161     # !if(return_pred, "|$pred", "") # ", "
162     # "$src, $offset, $mask"
163     # !if(sync, ", $threadmask", "")
164     # ";"
165     ;
166  let Pattern = [!con(
167      !foreach(tmp, OutOperandList,
168             !subst(outs, set,
169             !subst(i32imm, imm, tmp))),
170      (set !foreach(tmp, InOperandList,
171             !subst(ins, Intr,
172             !subst(i32imm, imm, tmp))))
173  )];
174}
175
176foreach sync = [false, true] in {
177  foreach mode = ["up", "down", "bfly", "idx"] in {
178    foreach regclass = ["i32", "f32"] in {
179      foreach return_pred = [false, true] in {
180        foreach offset_imm = [false, true] in {
181          foreach mask_imm = [false, true] in {
182            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183              def : SHFL_INSTR<sync, mode, regclass, return_pred,
184                               offset_imm, mask_imm, threadmask_imm>,
185                    Requires<!if(sync, [hasSM30, hasPTX60], [hasSM30, hasSHFL])>;
186            }
187          }
188        }
189      }
190    }
191  }
192}
193
194// vote.{all,any,uni,ballot}
195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197              "vote." # mode # " \t$dest, $pred;",
198              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199        Requires<[hasPTX60, hasSM30]>;
200}
201
202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
206
207// vote.sync.{all,any,uni,ballot}
208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210              "vote.sync." # mode # " \t$dest, $pred, $mask;",
211              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212          Requires<[hasPTX60, hasSM30]>;
213  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214              "vote.sync." # mode #" \t$dest, $pred, $mask;",
215              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216          Requires<[hasPTX60, hasSM30]>;
217}
218
219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
223
224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
225                          Operand ImmOp> {
226  def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
227              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228              [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
229           Requires<[hasPTX60, hasSM70]>;
230  def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233           Requires<[hasPTX60, hasSM70]>;
234  def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
235              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236              [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
237           Requires<[hasPTX60, hasSM70]>;
238  def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
239              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241           Requires<[hasPTX60, hasSM70]>;
242}
243
244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
245                                        i32imm>;
246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
247                                        i64imm>;
248
249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
250                          Operand ImmOp> {
251  def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
252                     (ins i32imm:$mask, ImmOp:$value),
253              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255           Requires<[hasPTX60, hasSM70]>;
256  def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
257                     (ins Int32Regs:$mask, ImmOp:$value),
258              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260           Requires<[hasPTX60, hasSM70]>;
261  def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
262                     (ins i32imm:$mask, regclass:$value),
263              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265           Requires<[hasPTX60, hasSM70]>;
266  def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
267                     (ins Int32Regs:$mask, regclass:$value),
268              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270           Requires<[hasPTX60, hasSM70]>;
271}
272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
273                                         i32imm>;
274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
275                                         i64imm>;
276
277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
278  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
279          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
280          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
281        Requires<[hasPTX70, hasSM80]>;
282}
283
284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
292
293} // isConvergent = true
294
295//-----------------------------------
296// Explicit Memory Fence Functions
297//-----------------------------------
298class MEMBAR<string StrOp, Intrinsic IntOP> :
299              NVPTXInst<(outs), (ins),
300            StrOp, [(IntOP)]>;
301
302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
303def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
305
306
307//-----------------------------------
308// Async Copy Functions
309//-----------------------------------
310
311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
312  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
313            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
314            [(Intrin Int32Regs:$addr)]>,
315    Requires<[hasPTX70, hasSM80]>;
316  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
317            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
318            [(Intrin Int64Regs:$addr)]>,
319    Requires<[hasPTX70, hasSM80]>;
320}
321
322defm CP_ASYNC_MBARRIER_ARRIVE :
323  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
325  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
327  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
329  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
330
331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
332  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
333            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
334            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
335    Requires<[hasPTX70, hasSM80]>;
336  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
337            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
338            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
339    Requires<[hasPTX70, hasSM80]>;
340}
341
342defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
343  CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
344
345defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
346  CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
347
348defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
349  CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
350
351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
352  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
353            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
354            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
355    Requires<[hasPTX70, hasSM80]>;
356  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
357            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
358            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
359    Requires<[hasPTX70, hasSM80]>;
360}
361
362defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
363  CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
364
365def CP_ASYNC_COMMIT_GROUP :
366  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
367  Requires<[hasPTX70, hasSM80]>;
368
369def CP_ASYNC_WAIT_GROUP :
370  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
371  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
372  Requires<[hasPTX70, hasSM80]>;
373
374def CP_ASYNC_WAIT_ALL :
375  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
376  [(int_nvvm_cp_async_wait_all)]>,
377  Requires<[hasPTX70, hasSM80]>;
378
379//-----------------------------------
380// MBarrier Functions
381//-----------------------------------
382
383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
384  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
385           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
386    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
387    Requires<[hasPTX70, hasSM80]>;
388  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
389           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
390    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
391    Requires<[hasPTX70, hasSM80]>;
392}
393
394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
396                                          int_nvvm_mbarrier_init_shared>;
397
398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
399  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
400           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
401    [(Intrin Int32Regs:$addr)]>,
402    Requires<[hasPTX70, hasSM80]>;
403  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
404           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
405    [(Intrin Int64Regs:$addr)]>,
406    Requires<[hasPTX70, hasSM80]>;
407}
408
409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
411                                            int_nvvm_mbarrier_inval_shared>;
412
413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
414  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
415           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
416    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
417    Requires<[hasPTX70, hasSM80]>;
418  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
419           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
420    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
421    Requires<[hasPTX70, hasSM80]>;
422}
423
424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
425defm MBARRIER_ARRIVE_SHARED :
426  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
427
428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
429  def _32 : NVPTXInst<(outs Int64Regs:$state),
430           (ins Int32Regs:$addr, Int32Regs:$count),
431           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
432                      ".b64 $state, [$addr], $count;"),
433    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
434    Requires<[hasPTX70, hasSM80]>;
435  def _64 : NVPTXInst<(outs Int64Regs:$state),
436           (ins Int64Regs:$addr, Int32Regs:$count),
437           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
438                      ".b64 $state, [$addr], $count;"),
439    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
440    Requires<[hasPTX70, hasSM80]>;
441}
442
443defm MBARRIER_ARRIVE_NOCOMPLETE :
444  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
446  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
447
448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
449  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
450           !strconcat("mbarrier.arrive_drop", AddrSpace,
451                      ".b64 $state, [$addr];"),
452           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
453    Requires<[hasPTX70, hasSM80]>;
454  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
455           !strconcat("mbarrier.arrive_drop", AddrSpace,
456                      ".b64 $state, [$addr];"),
457           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
458    Requires<[hasPTX70, hasSM80]>;
459}
460
461defm MBARRIER_ARRIVE_DROP :
462  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
463defm MBARRIER_ARRIVE_DROP_SHARED :
464  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
465
466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
467  def _32 : NVPTXInst<(outs Int64Regs:$state),
468           (ins Int32Regs:$addr, Int32Regs:$count),
469           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
470                      ".b64 $state, [$addr], $count;"),
471           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
472    Requires<[hasPTX70, hasSM80]>;
473  def _64 : NVPTXInst<(outs Int64Regs:$state),
474           (ins Int64Regs:$addr, Int32Regs:$count),
475           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
476                      ".b64 $state, [$addr], $count;"),
477           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
478    Requires<[hasPTX70, hasSM80]>;
479}
480
481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
482  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
484  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
485                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
486
487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
488  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
489           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
490           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
491    Requires<[hasPTX70, hasSM80]>;
492  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
493           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
494           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
495    Requires<[hasPTX70, hasSM80]>;
496}
497
498defm MBARRIER_TEST_WAIT :
499  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
500defm MBARRIER_TEST_WAIT_SHARED :
501  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
502
503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
504           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
505           "mbarrier.pending_count.b64 $res, $state;",
506           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
507    Requires<[hasPTX70, hasSM80]>;
508
509def MBARRIER_PENDING_COUNT :
510  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
511
512//-----------------------------------
513// Math Functions
514//-----------------------------------
515
516// Map min(1.0, max(0.0, x)) to sat(x)
517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
518// NaN
519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
520// Same story for fmax, fmin.
521
522def : Pat<(int_nvvm_fmin_f immFloat1,
523            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
524          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
525def : Pat<(int_nvvm_fmin_f immFloat1,
526            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
527          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
528def : Pat<(int_nvvm_fmin_f
529            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
530          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531def : Pat<(int_nvvm_fmin_f
532            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
533          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
534
535def : Pat<(int_nvvm_fmin_d immDouble1,
536            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
537          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
538def : Pat<(int_nvvm_fmin_d immDouble1,
539            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
540          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
541def : Pat<(int_nvvm_fmin_d
542            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
543          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
544def : Pat<(int_nvvm_fmin_d
545            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
546          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
547
548
549// We need a full string for OpcStr here because we need to deal with case like
550// INT_PTX_RECIP.
551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
552  NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
553            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
554            OpcStr,
555        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
556        Requires<Preds>;
557
558// We need a full string for OpcStr here because we need to deal with the case
559// like INT_PTX_NATIVE_POWR_F.
560class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
561  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
562  list<Predicate> Preds = []>
563            : NVPTXInst<(outs t_regclass:$dst),
564              (ins s0_regclass:$src0, s1_regclass:$src1),
565            OpcStr,
566        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
567        Requires<Preds>;
568
569class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
570  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
571  NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
572            : NVPTXInst<(outs t_regclass:$dst),
573              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
574            OpcStr,
575        [(set t_regclass:$dst,
576          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
577          Requires<Preds>;
578
579//
580// MISC
581//
582
583def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
584  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
585
586//
587// Min Max
588//
589
590def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
591  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
592def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
593  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
594def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
595  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
596  [hasPTX70, hasSM80]>;
597def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
598  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
599  [hasPTX70, hasSM80]>;
600def INT_NVVM_FMIN_XORSIGN_ABS_F :
601  F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
602    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
603    [hasPTX72, hasSM86]>;
604def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
605  F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
606    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
607    [hasPTX72, hasSM86]>;
608def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
609  F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
610    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
611    [hasPTX72, hasSM86]>;
612def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
613  F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
614    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
615    [hasPTX72, hasSM86]>;
616
617def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
618  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
619def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
620  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
621def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
622  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
623  [hasPTX70, hasSM80]>;
624def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
625  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
626  [hasPTX70, hasSM80]>;
627def INT_NVVM_FMAX_XORSIGN_ABS_F :
628  F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
629    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
630    [hasPTX72, hasSM86]>;
631def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
632  F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
633    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
634    [hasPTX72, hasSM86]>;
635def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
636  F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
637    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
638    [hasPTX72, hasSM86]>;
639def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
640  F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
641    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
642    [hasPTX72, hasSM86]>;
643
644def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
645  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
646def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
647  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
648
649//
650// Min Max f16, f16x2, bf16, bf16x2
651//
652
653class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
654                    list<Predicate> Preds = [hasPTX70, hasSM80]> {
655  string Variant = V;
656  Intrinsic Intr = I;
657  NVPTXRegClass RegClass = RC;
658  list<Predicate> Predicates = Preds;
659}
660
661multiclass MIN_MAX<string IntName> {
662  foreach P = [
663    MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
664      int_nvvm_fmax_f16), Float16Regs>,
665    MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
666      int_nvvm_fmax_ftz_f16), Float16Regs>,
667    MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
668      int_nvvm_fmax_nan_f16), Float16Regs>,
669    MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
670      int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>,
671    MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
672      int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
673      Float16Regs, [hasPTX72, hasSM86]>,
674    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
675      int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
676      Float16Regs, [hasPTX72, hasSM86]>,
677    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
678      int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
679      Float16Regs, [hasPTX72, hasSM86]>,
680    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
681      int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
682      int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX72, hasSM86]>,
683    MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
684      int_nvvm_fmax_f16x2), Float16x2Regs>,
685    MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
686      int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>,
687    MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
688      int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>,
689    MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
690      int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>,
691    MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
692      int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
693      Float16x2Regs, [hasPTX72, hasSM86]>,
694    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
695      int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
696      Float16x2Regs, [hasPTX72, hasSM86]>,
697    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
698      int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
699      Float16x2Regs, [hasPTX72, hasSM86]>,
700    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
701      int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
702      int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
703      Float16x2Regs, [hasPTX72, hasSM86]>,
704    MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
705      int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
706    MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
707      int_nvvm_fmax_nan_bf16), Int16Regs>,
708    MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
709      int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
710      Int16Regs, [hasPTX72, hasSM86]>,
711    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
712      int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
713      Int16Regs, [hasPTX72, hasSM86]>,
714    MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
715      int_nvvm_fmax_bf16x2), Int32Regs>,
716    MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
717      int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
718    MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
719      int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
720      Int32Regs, [hasPTX72, hasSM86]>,
721    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
722      int_nvvm_fmin_nan_xorsign_abs_bf16x2,
723      int_nvvm_fmax_nan_xorsign_abs_bf16x2),
724      Int32Regs, [hasPTX72, hasSM86]>] in {
725        def P.Variant : F_MATH_2<!strconcat(
726          IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
727          P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
728  }
729}
730
731defm INT_NVVM_FMIN : MIN_MAX<"min">;
732defm INT_NVVM_FMAN : MIN_MAX<"max">;
733
734//
735// Multiplication
736//
737
738def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
739  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
740def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
741  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
742
743def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
744  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
745def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
746  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
747
748def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
749  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
750def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
751  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
752def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
753  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
754def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
755  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
756def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
757  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
758def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
759  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
760def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
761  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
762def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
763  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
764
765def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
766  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
767def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
768  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
769def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
770  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
771def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
772  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
773
774def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
775  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
776def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
777  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
778
779//
780// Div
781//
782
783def INT_NVVM_DIV_APPROX_FTZ_F
784  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
785    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
786def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
787  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
788
789def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
790  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
791def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
792  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
793def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
794  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
795def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
796  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
797def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
798  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
799def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
800  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
801def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
802  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
803def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
804  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
805
806def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
807  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
808def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
809  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
810def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
811  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
812def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
813  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
814
815//
816// Sad
817//
818
819def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
820  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
821def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
822  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
823
824//
825// Floor  Ceil
826//
827
828def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
829          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
830def : Pat<(int_nvvm_floor_f Float32Regs:$a),
831          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
832def : Pat<(int_nvvm_floor_d Float64Regs:$a),
833          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
834
835def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
836          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
837def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
838          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
839def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
840          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
841
842//
843// Abs
844//
845
846def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
847  Float32Regs, int_nvvm_fabs_ftz_f>;
848def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
849  Float32Regs, int_nvvm_fabs_f>;
850
851def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
852  Float64Regs, int_nvvm_fabs_d>;
853
854//
855// Abs, Neg bf16, bf16x2
856//
857
858def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
859  Int16Regs, int_nvvm_abs_bf16, [hasPTX70, hasSM80]>;
860def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
861  Int32Regs, int_nvvm_abs_bf16x2, [hasPTX70, hasSM80]>;
862def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
863  Int16Regs, int_nvvm_neg_bf16, [hasPTX70, hasSM80]>;
864def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
865  Int32Regs, int_nvvm_neg_bf16x2, [hasPTX70, hasSM80]>;
866
867//
868// Round
869//
870
871def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
872          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
873def : Pat<(int_nvvm_round_f Float32Regs:$a),
874          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
875def : Pat<(int_nvvm_round_d Float64Regs:$a),
876          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
877
878//
879// Trunc
880//
881
882def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
883          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
884def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
885          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
886def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
887          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
888
889//
890// Saturate
891//
892
893def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
894          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
895def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
896          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
897def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
898          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
899
900//
901// Exp2  Log2
902//
903
904def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
905  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
906def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
907  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
908def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
909  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
910def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
911  Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX70, hasSM75]>;
912def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
913  Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX70, hasSM75]>;
914
915def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
916  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
917def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
918  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
919def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
920  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
921
922//
923// Sin  Cos
924//
925
926def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
927  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
928def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
929  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
930
931def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
932  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
933def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
934  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
935
936//
937// Fma
938//
939
940class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
941                list<Predicate> Preds = []> {
942  string Variant = V;
943  Intrinsic Intr = I;
944  NVPTXRegClass RegClass = RC;
945  list<Predicate> Predicates = Preds;
946}
947
948multiclass FMA_INST {
949  foreach P = [
950    FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
951    FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
952    FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
953    FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
954
955    FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
956    FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
957    FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
958    FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
959    FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
960    FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
961    FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
962    FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
963
964    FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX42, hasSM53]>,
965    FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs,
966      [hasPTX42, hasSM53]>,
967    FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs,
968      [hasPTX42, hasSM53]>,
969    FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs,
970      [hasPTX42, hasSM53]>,
971    FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs,
972      [hasPTX70, hasSM80]>,
973    FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs,
974      [hasPTX70, hasSM80]>,
975
976    FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs,
977      [hasPTX42, hasSM53]>,
978    FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs,
979      [hasPTX42, hasSM53]>,
980    FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs,
981      [hasPTX42, hasSM53]>,
982    FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
983      Float16x2Regs, [hasPTX42, hasSM53]>,
984    FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs,
985      [hasPTX70, hasSM80]>,
986    FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
987      Float16x2Regs, [hasPTX70, hasSM80]>,
988
989    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX70, hasSM80]>,
990    FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
991      [hasPTX70, hasSM80]>,
992
993    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
994      [hasPTX70, hasSM80]>,
995    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
996      [hasPTX70, hasSM80]>
997  ] in {
998    def P.Variant :
999      F_MATH_3<!strconcat("fma",
1000        !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
1001        P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
1002  }
1003}
1004
1005defm INT_NVVM_FMA : FMA_INST;
1006
1007//
1008// Rcp
1009//
1010
1011def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
1012  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
1013def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
1014  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
1015def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
1016  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
1017def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
1018  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
1019def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
1020  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
1021def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
1022  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
1023def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
1024  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
1025def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
1026  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
1027
1028def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
1029  Float64Regs, int_nvvm_rcp_rn_d>;
1030def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
1031  Float64Regs, int_nvvm_rcp_rz_d>;
1032def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
1033  Float64Regs, int_nvvm_rcp_rm_d>;
1034def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
1035  Float64Regs, int_nvvm_rcp_rp_d>;
1036
1037def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
1038  Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
1039def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
1040  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
1041
1042//
1043// Sqrt
1044//
1045
1046def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
1047  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
1048def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
1049  Float32Regs, int_nvvm_sqrt_rn_f>;
1050def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
1051  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
1052def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
1053  Float32Regs, int_nvvm_sqrt_rz_f>;
1054def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
1055  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
1056def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
1057  Float32Regs, int_nvvm_sqrt_rm_f>;
1058def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
1059  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
1060def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
1061  Float32Regs, int_nvvm_sqrt_rp_f>;
1062def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
1063  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
1064def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
1065  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
1066
1067def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
1068  Float64Regs, int_nvvm_sqrt_rn_d>;
1069def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
1070  Float64Regs, int_nvvm_sqrt_rz_d>;
1071def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
1072  Float64Regs, int_nvvm_sqrt_rm_d>;
1073def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
1074  Float64Regs, int_nvvm_sqrt_rp_d>;
1075
1076// nvvm_sqrt intrinsic
1077def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1078          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
1079def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1080          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
1081def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1082          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
1083def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
1084          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
1085
1086//
1087// Rsqrt
1088//
1089
1090def INT_NVVM_RSQRT_APPROX_FTZ_F
1091  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
1092    int_nvvm_rsqrt_approx_ftz_f>;
1093def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
1094  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
1095def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
1096  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
1097
1098//
1099// Add
1100//
1101
1102def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
1103  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
1104def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
1105  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
1106def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
1107  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
1108def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
1109  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
1110def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
1111  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
1112def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
1113  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
1114def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
1115  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
1116def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
1117  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
1118
1119def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
1120  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
1121def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
1122  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
1123def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
1124  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
1125def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
1126  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
1127
1128//
1129// Convert
1130//
1131
1132def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
1133          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
1134def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
1135          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
1136def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
1137          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
1138def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
1139          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
1140def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
1141          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
1142def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
1143          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
1144def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
1145          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
1146def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
1147          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
1148
1149def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
1150          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
1151def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
1152          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
1153def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
1154          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
1155def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
1156          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
1157
1158def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
1159          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
1160def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
1161          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
1162def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
1163          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
1164def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
1165          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
1166
1167def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
1168          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
1169def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
1170          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
1171def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
1172          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
1173def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
1174          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
1175
1176def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
1177          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
1178def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
1179          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
1180def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
1181          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
1182def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
1183          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
1184
1185def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
1186          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1187def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1188          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1189def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1190          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1191def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1192          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1193def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1194          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1195def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1196          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1197def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1198          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1199def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1200          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1201
1202def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1203          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1204def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1205          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1206def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1207          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1208def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1209          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1210def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1211          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1212def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1213          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1214def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1215          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1216def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1217          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1218
1219def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1220          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1221def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1222          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1223def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1224          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1225def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1226          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1227
1228def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1229          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1230def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1231          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1232def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1233          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1234def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1235          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1236
1237def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1238          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1239def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1240          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1241def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1242          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1243def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1244          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1245
1246def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1247          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1248def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1249          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1250def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1251          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1252def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1253          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1254
1255def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1256          (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1257def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1258          (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1259def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1260          (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1261def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1262          (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1263
1264def CVT_tf32_f32 :
1265   NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1266                   "cvt.rna.tf32.f32 \t$dest, $a;",
1267       [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1268
1269def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1270  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1271
1272def INT_NVVM_D2I_LO : F_MATH_1<
1273  !strconcat("{{\n\t",
1274             ".reg .b32 %temp; \n\t",
1275             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1276             "}}"),
1277  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1278def INT_NVVM_D2I_HI : F_MATH_1<
1279  !strconcat("{{\n\t",
1280             ".reg .b32 %temp; \n\t",
1281             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1282             "}}"),
1283  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1284
1285def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1286          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1287def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1288          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1289def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1290          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1291def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1292          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1293def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1294          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1295def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1296          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1297def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1298          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1299def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1300          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1301
1302def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1303          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1304def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1305          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1306def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1307          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1308def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1309          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1310def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1311          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1312def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1313          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1314def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1315          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1316def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1317          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1318
1319def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1320          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1321def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1322          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1323def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1324          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1325def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1326          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1327
1328def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1329          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1330def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1331          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1332def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1333          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1334def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1335          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1336
1337def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1338          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1339def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1340          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1341def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1342          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1343def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1344          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1345
1346def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1347          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1348def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1349          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1350def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1351          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1352def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1353          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1354
1355def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1356          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1357def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1358          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1359def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1360          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1361def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1362          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1363
1364def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1365          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1366def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1367          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1368def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1369          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1370def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1371          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1372
1373
1374def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1375          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
1376def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1377          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
1378
1379//
1380// Bitcast
1381//
1382
1383def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1384  Float32Regs, int_nvvm_bitcast_f2i>;
1385def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1386  Int32Regs, int_nvvm_bitcast_i2f>;
1387
1388def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1389  Int64Regs, int_nvvm_bitcast_ll2d>;
1390def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1391  Float64Regs, int_nvvm_bitcast_d2ll>;
1392
1393//
1394// FNS
1395//
1396
1397class INT_FNS_MBO<dag ins, dag Operands>
1398  : NVPTXInst<(outs Int32Regs:$dst), ins,
1399               "fns.b32 \t$dst, $mask, $base, $offset;",
1400               [(set Int32Regs:$dst, Operands )]>,
1401    Requires<[hasPTX60, hasSM30]>;
1402
1403def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1404                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1405def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1406                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1407def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1408                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1409def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1410                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1411def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1412                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1413def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1414                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1415def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1416                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1417def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1418                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1419
1420//-----------------------------------
1421// Atomic Functions
1422//-----------------------------------
1423
1424class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1425 : PatFrag<ops, frag, AS_match.global>;
1426class ATOMIC_SHARED_CHK <dag ops, dag frag>
1427 : PatFrag<ops, frag, AS_match.shared>;
1428class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1429 : PatFrag<ops, frag, AS_match.generic>;
1430
1431multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1432  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1433  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1434  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1435    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1436    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1437  Requires<Pred>;
1438  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1439    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1440    [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1441  Requires<Pred>;
1442}
1443multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1444  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1445  list<Predicate> Pred = []> {
1446  defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1447    IntOp, IMMType, IMM, Pred>;
1448  defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1449    IntOp, IMMType, IMM, Pred>;
1450}
1451
1452// has 2 operands, neg the second one
1453multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1454  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1455  list<Predicate> Pred> {
1456  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1457    !strconcat(
1458      "{{ \n\t",
1459      ".reg \t.s", TypeStr, " temp; \n\t",
1460      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1461      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1462      "}}"),
1463    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1464  Requires<Pred>;
1465}
1466multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1467  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1468 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1469   IntOp, Pred> ;
1470 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1471   IntOp, Pred> ;
1472}
1473
1474// has 3 operands
1475multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1476  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1477  Operand IMMType, list<Predicate> Pred> {
1478  def reg : NVPTXInst<(outs regclass:$dst),
1479    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1480    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1481    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1482  Requires<Pred>;
1483
1484  def imm1 : NVPTXInst<(outs regclass:$dst),
1485    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1486    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1487    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1488  Requires<Pred>;
1489
1490  def imm2 : NVPTXInst<(outs regclass:$dst),
1491    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1492    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1493    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1494  Requires<Pred>;
1495
1496  def imm3 : NVPTXInst<(outs regclass:$dst),
1497    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1498    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1499    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1500  Requires<Pred>;
1501}
1502multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1503  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1504  defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1505    IntOp, IMMType, Pred>;
1506  defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1507    IntOp, IMMType, Pred>;
1508}
1509
1510// atom_add
1511
1512def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1513  (atomic_load_add_32 node:$a, node:$b)>;
1514def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1515  (atomic_load_add_32 node:$a, node:$b)>;
1516def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1517  (atomic_load_add_32 node:$a, node:$b)>;
1518def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1519  (atomic_load_add_64 node:$a, node:$b)>;
1520def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1521  (atomic_load_add_64 node:$a, node:$b)>;
1522def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1523  (atomic_load_add_64 node:$a, node:$b)>;
1524def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1525  (atomic_load_fadd node:$a, node:$b)>;
1526def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1527  (atomic_load_fadd node:$a, node:$b)>;
1528def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1529  (atomic_load_fadd node:$a, node:$b)>;
1530
1531defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1532  atomic_load_add_32_g, i32imm, imm>;
1533defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1534  atomic_load_add_32_s, i32imm, imm>;
1535defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1536  atomic_load_add_32_gen, i32imm, imm>;
1537defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1538  ".add", atomic_load_add_32_gen, i32imm, imm>;
1539
1540defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1541  atomic_load_add_64_g, i64imm, imm>;
1542defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1543  atomic_load_add_64_s, i64imm, imm>;
1544defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1545  atomic_load_add_64_gen, i64imm, imm>;
1546defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1547  ".add", atomic_load_add_64_gen, i64imm, imm>;
1548
1549defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1550  atomic_load_add_g, f32imm, fpimm>;
1551defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1552  atomic_load_add_s, f32imm, fpimm>;
1553defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1554  atomic_load_add_gen, f32imm, fpimm>;
1555
1556defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1557  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1558defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1559  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1560defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1561  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1562
1563// atom_sub
1564
1565def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1566  (atomic_load_sub_32 node:$a, node:$b)>;
1567def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1568  (atomic_load_sub_32 node:$a, node:$b)>;
1569def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1570  (atomic_load_sub_32 node:$a, node:$b)>;
1571def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1572  (atomic_load_sub_64 node:$a, node:$b)>;
1573def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1574  (atomic_load_sub_64 node:$a, node:$b)>;
1575def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1576  (atomic_load_sub_64 node:$a, node:$b)>;
1577
1578defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1579  atomic_load_sub_32_g>;
1580defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1581  atomic_load_sub_64_g>;
1582defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1583  atomic_load_sub_32_gen>;
1584defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1585  ".add", atomic_load_sub_32_gen>;
1586defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1587  atomic_load_sub_32_s>;
1588defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1589  atomic_load_sub_64_s>;
1590defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1591  atomic_load_sub_64_gen>;
1592defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1593  ".add", atomic_load_sub_64_gen>;
1594
1595// atom_swap
1596
1597def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1598  (atomic_swap_32 node:$a, node:$b)>;
1599def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1600  (atomic_swap_32 node:$a, node:$b)>;
1601def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1602  (atomic_swap_32 node:$a, node:$b)>;
1603def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1604  (atomic_swap_64 node:$a, node:$b)>;
1605def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1606  (atomic_swap_64 node:$a, node:$b)>;
1607def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1608  (atomic_swap_64 node:$a, node:$b)>;
1609
1610defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1611  atomic_swap_32_g, i32imm, imm>;
1612defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1613  atomic_swap_32_s, i32imm, imm>;
1614defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1615  atomic_swap_32_gen, i32imm, imm>;
1616defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1617  ".exch", atomic_swap_32_gen, i32imm, imm>;
1618defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1619  atomic_swap_64_g, i64imm, imm>;
1620defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1621  atomic_swap_64_s, i64imm, imm>;
1622defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1623  atomic_swap_64_gen, i64imm, imm>;
1624defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1625  ".exch", atomic_swap_64_gen, i64imm, imm>;
1626
1627// atom_max
1628
1629def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1630  , (atomic_load_max_32 node:$a, node:$b)>;
1631def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1632  (atomic_load_max_32 node:$a, node:$b)>;
1633def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1634  (atomic_load_max_32 node:$a, node:$b)>;
1635def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1636  , (atomic_load_max_64 node:$a, node:$b)>;
1637def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1638  (atomic_load_max_64 node:$a, node:$b)>;
1639def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1640  (atomic_load_max_64 node:$a, node:$b)>;
1641def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1642  (atomic_load_umax_32 node:$a, node:$b)>;
1643def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1644  (atomic_load_umax_32 node:$a, node:$b)>;
1645def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1646  (atomic_load_umax_32 node:$a, node:$b)>;
1647def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1648  (atomic_load_umax_64 node:$a, node:$b)>;
1649def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1650  (atomic_load_umax_64 node:$a, node:$b)>;
1651def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1652  (atomic_load_umax_64 node:$a, node:$b)>;
1653
1654defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1655  ".max", atomic_load_max_32_g, i32imm, imm>;
1656defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1657  ".max", atomic_load_max_32_s, i32imm, imm>;
1658defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1659  atomic_load_max_32_gen, i32imm, imm>;
1660defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1661  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1662defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1663  ".max", atomic_load_max_64_g, i64imm, imm, [hasSM32]>;
1664defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1665  ".max", atomic_load_max_64_s, i64imm, imm, [hasSM32]>;
1666defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1667  atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
1668defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1669  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
1670defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1671  ".max", atomic_load_umax_32_g, i32imm, imm>;
1672defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1673  ".max", atomic_load_umax_32_s, i32imm, imm>;
1674defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1675  atomic_load_umax_32_gen, i32imm, imm>;
1676defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1677  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1678defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1679  ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM32]>;
1680defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1681  ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM32]>;
1682defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1683  atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
1684defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1685  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
1686
1687// atom_min
1688
1689def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1690  (atomic_load_min_32 node:$a, node:$b)>;
1691def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1692  (atomic_load_min_32 node:$a, node:$b)>;
1693def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1694  (atomic_load_min_32 node:$a, node:$b)>;
1695def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1696  (atomic_load_min_64 node:$a, node:$b)>;
1697def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1698  (atomic_load_min_64 node:$a, node:$b)>;
1699def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1700  (atomic_load_min_64 node:$a, node:$b)>;
1701def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1702  (atomic_load_umin_32 node:$a, node:$b)>;
1703def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1704  (atomic_load_umin_32 node:$a, node:$b)>;
1705def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1706  (atomic_load_umin_32 node:$a, node:$b)>;
1707def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1708  (atomic_load_umin_64 node:$a, node:$b)>;
1709def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1710  (atomic_load_umin_64 node:$a, node:$b)>;
1711def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1712  (atomic_load_umin_64 node:$a, node:$b)>;
1713
1714defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1715  ".min", atomic_load_min_32_g, i32imm, imm>;
1716defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1717  ".min", atomic_load_min_32_s, i32imm, imm>;
1718defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1719  atomic_load_min_32_gen, i32imm, imm>;
1720defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1721  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1722defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1723  ".min", atomic_load_min_64_g, i64imm, imm, [hasSM32]>;
1724defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1725  ".min", atomic_load_min_64_s, i64imm, imm, [hasSM32]>;
1726defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1727  atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
1728defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1729  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
1730defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1731  ".min", atomic_load_umin_32_g, i32imm, imm>;
1732defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1733  ".min", atomic_load_umin_32_s, i32imm, imm>;
1734defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1735  atomic_load_umin_32_gen, i32imm, imm>;
1736defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1737  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1738defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1739  ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM32]>;
1740defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1741  ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM32]>;
1742defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1743  atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
1744defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1745  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
1746
1747// atom_inc  atom_dec
1748
1749def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1750  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1751def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1752  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1753def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1754  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1755def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1756  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1757def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1758  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1759def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1760  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1761
1762defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1763  atomic_load_inc_32_g, i32imm, imm>;
1764defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1765  atomic_load_inc_32_s, i32imm, imm>;
1766defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1767  atomic_load_inc_32_gen, i32imm, imm>;
1768defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1769  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1770defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1771  atomic_load_dec_32_g, i32imm, imm>;
1772defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1773  atomic_load_dec_32_s, i32imm, imm>;
1774defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1775  atomic_load_dec_32_gen, i32imm, imm>;
1776defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1777  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1778
1779// atom_and
1780
1781def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1782  (atomic_load_and_32 node:$a, node:$b)>;
1783def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1784  (atomic_load_and_32 node:$a, node:$b)>;
1785def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1786  (atomic_load_and_32 node:$a, node:$b)>;
1787def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1788  (atomic_load_and_64 node:$a, node:$b)>;
1789def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1790  (atomic_load_and_64 node:$a, node:$b)>;
1791def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1792  (atomic_load_and_64 node:$a, node:$b)>;
1793
1794defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1795  atomic_load_and_32_g, i32imm, imm>;
1796defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1797  atomic_load_and_32_s, i32imm, imm>;
1798defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1799  atomic_load_and_32_gen, i32imm, imm>;
1800defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1801  ".and", atomic_load_and_32_gen, i32imm, imm>;
1802defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1803  atomic_load_and_64_g, i64imm, imm, [hasSM32]>;
1804defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1805  atomic_load_and_64_s, i64imm, imm, [hasSM32]>;
1806defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1807  atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
1808defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1809  ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
1810
1811// atom_or
1812
1813def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1814  (atomic_load_or_32 node:$a, node:$b)>;
1815def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1816  (atomic_load_or_32 node:$a, node:$b)>;
1817def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1818  (atomic_load_or_32 node:$a, node:$b)>;
1819def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1820  (atomic_load_or_64 node:$a, node:$b)>;
1821def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1822  (atomic_load_or_64 node:$a, node:$b)>;
1823def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1824  (atomic_load_or_64 node:$a, node:$b)>;
1825
1826defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1827  atomic_load_or_32_g, i32imm, imm>;
1828defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1829  atomic_load_or_32_gen, i32imm, imm>;
1830defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1831  ".or", atomic_load_or_32_gen, i32imm, imm>;
1832defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1833  atomic_load_or_32_s, i32imm, imm>;
1834defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1835  atomic_load_or_64_g, i64imm, imm, [hasSM32]>;
1836defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1837  atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
1838defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1839  ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
1840defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1841  atomic_load_or_64_s, i64imm, imm, [hasSM32]>;
1842
1843// atom_xor
1844
1845def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1846  (atomic_load_xor_32 node:$a, node:$b)>;
1847def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1848  (atomic_load_xor_32 node:$a, node:$b)>;
1849def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1850  (atomic_load_xor_32 node:$a, node:$b)>;
1851def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1852  (atomic_load_xor_64 node:$a, node:$b)>;
1853def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1854  (atomic_load_xor_64 node:$a, node:$b)>;
1855def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1856  (atomic_load_xor_64 node:$a, node:$b)>;
1857
1858defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1859  atomic_load_xor_32_g, i32imm, imm>;
1860defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1861  atomic_load_xor_32_s, i32imm, imm>;
1862defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1863  atomic_load_xor_32_gen, i32imm, imm>;
1864defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1865  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1866defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1867  atomic_load_xor_64_g, i64imm, imm, [hasSM32]>;
1868defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1869  atomic_load_xor_64_s, i64imm, imm, [hasSM32]>;
1870defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1871  atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
1872defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1873  ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
1874
1875// atom_cas
1876
1877def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1878  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1879def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1880  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1881def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1882  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1883def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1884  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1885def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1886  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1887def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1888  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1889
1890defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1891  atomic_cmp_swap_32_g, i32imm>;
1892defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1893  atomic_cmp_swap_32_s, i32imm>;
1894defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1895  atomic_cmp_swap_32_gen, i32imm>;
1896defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1897  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1898defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1899  atomic_cmp_swap_64_g, i64imm>;
1900defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1901  atomic_cmp_swap_64_s, i64imm>;
1902defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1903  atomic_cmp_swap_64_gen, i64imm>;
1904defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1905  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1906
1907// Support for scoped atomic operations.  Matches
1908// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1909// and converts it into the appropriate instruction.
1910// NOTE: not all possible combinations are implemented
1911//  'space' is limited to generic as it's the only one needed to support CUDA.
1912//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1913class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1914                  dag ins, dag Operands>
1915      : NVPTXInst<(outs regclass:$result), ins,
1916                  AsmStr,
1917                  [(set regclass:$result, Operands)]>,
1918        Requires<Preds>;
1919
1920// Define instruction variants for all addressing modes.
1921multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1922                       NVPTXRegClass regclass, Operand ImmType,
1923                       SDNode Imm, ValueType ImmTy,
1924                       list<Predicate> Preds> {
1925  let AddedComplexity = 1 in {
1926    def : ATOM23_impl<AsmStr, regclass, Preds,
1927                      (ins Int32Regs:$src, regclass:$b),
1928                      (Intr Int32Regs:$src, regclass:$b)>;
1929    def : ATOM23_impl<AsmStr, regclass, Preds,
1930                      (ins Int64Regs:$src, regclass:$b),
1931                      (Intr Int64Regs:$src, regclass:$b)>;
1932  }
1933  // tablegen can't infer argument types from Intrinsic (though it can
1934  // from Instruction) so we have to enforce specific type on
1935  // immediates via explicit cast to ImmTy.
1936  def : ATOM23_impl<AsmStr, regclass, Preds,
1937                    (ins Int32Regs:$src, ImmType:$b),
1938                    (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1939  def : ATOM23_impl<AsmStr, regclass, Preds,
1940                    (ins Int64Regs:$src, ImmType:$b),
1941                    (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1942}
1943
1944multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1945                       NVPTXRegClass regclass, Operand ImmType,
1946                       SDNode Imm, ValueType ImmTy,
1947                       list<Predicate> Preds> {
1948  // Variants for register/immediate permutations of $b and $c
1949  let AddedComplexity = 2 in {
1950    def : ATOM23_impl<AsmStr, regclass, Preds,
1951                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
1952                      (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1953    def : ATOM23_impl<AsmStr, regclass, Preds,
1954                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
1955                      (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1956  }
1957  let AddedComplexity = 1 in {
1958    def : ATOM23_impl<AsmStr, regclass, Preds,
1959                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1960                      (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1961    def : ATOM23_impl<AsmStr, regclass, Preds,
1962                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1963                      (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1964    def : ATOM23_impl<AsmStr, regclass, Preds,
1965                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1966                      (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1967    def : ATOM23_impl<AsmStr, regclass, Preds,
1968                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1969                      (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1970  }
1971  def : ATOM23_impl<AsmStr, regclass, Preds,
1972                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1973                    (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1974  def : ATOM23_impl<AsmStr, regclass, Preds,
1975                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1976                    (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1977}
1978
1979// Constructs intrinsic name and instruction asm strings.
1980multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1981                       string ScopeStr, string SpaceStr,
1982                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1983                       ValueType ImmTy, list<Predicate> Preds> {
1984  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1985                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1986                            # "." # OpStr # "." # TypeStr
1987                            # " \t$result, [$src], $b;",
1988                     !cast<Intrinsic>(
1989                            "int_nvvm_atomic_" # OpStr
1990                            # "_" # SpaceStr # "_" # IntTypeStr
1991                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1992                     regclass, ImmType, Imm, ImmTy, Preds>;
1993}
1994multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1995                       string ScopeStr, string SpaceStr,
1996                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1997                       ValueType ImmTy, list<Predicate> Preds> {
1998  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1999                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
2000                            # "." # OpStr # "." # TypeStr
2001                            # " \t$result, [$src], $b, $c;",
2002                     !cast<Intrinsic>(
2003                            "int_nvvm_atomic_" # OpStr
2004                            # "_" # SpaceStr # "_" # IntTypeStr
2005                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
2006                     regclass, ImmType, Imm, ImmTy, Preds>;
2007}
2008
2009// Constructs variants for different address spaces.
2010// For now we only need variants for generic space pointers.
2011multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
2012                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
2013                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2014   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2015                            regclass, ImmType, Imm, ImmTy, Preds>;
2016}
2017multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
2018                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
2019                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
2020   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
2021                            regclass, ImmType, Imm, ImmTy, Preds>;
2022}
2023
2024// Constructs variants for different scopes of atomic op.
2025multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
2026                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
2027                       ValueType ImmTy, list<Predicate> Preds> {
2028   // .gpu scope is default and is currently covered by existing
2029   // atomics w/o explicitly specified scope.
2030   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2031                           regclass, ImmType, Imm, ImmTy,
2032                           !listconcat(Preds,[hasAtomScope])>;
2033   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2034                           regclass, ImmType, Imm, ImmTy,
2035                           !listconcat(Preds,[hasAtomScope])>;
2036}
2037multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
2038           NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
2039           list<Predicate> Preds> {
2040   // No need to define ".gpu"-scoped atomics.  They do the same thing
2041   // as the regular, non-scoped atomics defined elsewhere.
2042   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
2043                           regclass, ImmType, Imm, ImmTy,
2044                           !listconcat(Preds,[hasAtomScope])>;
2045   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
2046                           regclass, ImmType, Imm, ImmTy,
2047                           !listconcat(Preds,[hasAtomScope])>;
2048}
2049
2050// atom.add
2051multiclass ATOM2_add_impl<string OpStr> {
2052   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
2053   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
2054   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
2055   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
2056                            []>;
2057   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
2058                            [hasAtomAddF64]>;
2059}
2060
2061// atom.{and,or,xor}
2062multiclass ATOM2_bitwise_impl<string OpStr> {
2063   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
2064   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
2065                            [hasAtomBitwise64]>;
2066}
2067
2068// atom.exch
2069multiclass ATOM2_exch_impl<string OpStr> {
2070   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
2071   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
2072}
2073
2074// atom.{min,max}
2075multiclass ATOM2_minmax_impl<string OpStr> {
2076   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
2077   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
2078   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
2079                            [hasAtomMinMax64]>;
2080   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
2081                            [hasAtomMinMax64]>;
2082}
2083
2084// atom.{inc,dec}
2085multiclass ATOM2_incdec_impl<string OpStr> {
2086   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
2087}
2088
2089// atom.cas
2090multiclass ATOM3_cas_impl<string OpStr> {
2091   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
2092   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
2093}
2094
2095defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
2096defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
2097defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
2098defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
2099defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
2100defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
2101defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
2102defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
2103defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
2104defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2105
2106//-----------------------------------
2107// Support for ldu on sm_20 or later
2108//-----------------------------------
2109
2110// Don't annotate ldu instructions as mayLoad, as they load from memory that is
2111// read-only in a kernel.
2112
2113// Scalar
2114
2115multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
2116  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2117               !strconcat("ldu.global.", TyStr),
2118                      []>, Requires<[hasLDU]>;
2119  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2120               !strconcat("ldu.global.", TyStr),
2121                        []>, Requires<[hasLDU]>;
2122 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2123               !strconcat("ldu.global.", TyStr),
2124                      []>, Requires<[hasLDU]>;
2125 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2126               !strconcat("ldu.global.", TyStr),
2127                      []>, Requires<[hasLDU]>;
2128 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2129               !strconcat("ldu.global.", TyStr),
2130                        []>, Requires<[hasLDU]>;
2131}
2132
2133defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
2134defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
2135defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2136defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2137defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
2138defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
2139defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
2140defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
2141defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
2142defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
2143
2144// vector
2145
2146// Elementized vector ldu
2147multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2148 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2149                     (ins Int32Regs:$src),
2150                     !strconcat("ldu.global.", TyStr), []>;
2151 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2152                     (ins Int64Regs:$src),
2153                     !strconcat("ldu.global.", TyStr), []>;
2154 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2155                     (ins MEMri:$src),
2156                     !strconcat("ldu.global.", TyStr), []>;
2157 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2158                     (ins MEMri64:$src),
2159                     !strconcat("ldu.global.", TyStr), []>;
2160 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2161                     (ins imemAny:$src),
2162                     !strconcat("ldu.global.", TyStr), []>;
2163}
2164
2165multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2166 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2167                            regclass:$dst4), (ins Int32Regs:$src),
2168               !strconcat("ldu.global.", TyStr), []>;
2169 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2170                            regclass:$dst4), (ins Int64Regs:$src),
2171               !strconcat("ldu.global.", TyStr), []>;
2172 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2173                            regclass:$dst4), (ins MEMri:$src),
2174               !strconcat("ldu.global.", TyStr), []>;
2175 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2176                            regclass:$dst4), (ins MEMri64:$src),
2177               !strconcat("ldu.global.", TyStr), []>;
2178 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2179                            regclass:$dst4), (ins imemAny:$src),
2180               !strconcat("ldu.global.", TyStr), []>;
2181}
2182
2183defm INT_PTX_LDU_G_v2i8_ELE
2184  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2185defm INT_PTX_LDU_G_v2i16_ELE
2186  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2187defm INT_PTX_LDU_G_v2i32_ELE
2188  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2189defm INT_PTX_LDU_G_v2f16_ELE
2190  : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2191defm INT_PTX_LDU_G_v2f16x2_ELE
2192  : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2193defm INT_PTX_LDU_G_v2f32_ELE
2194  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2195defm INT_PTX_LDU_G_v2i64_ELE
2196  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2197defm INT_PTX_LDU_G_v2f64_ELE
2198  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2199defm INT_PTX_LDU_G_v4i8_ELE
2200  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2201defm INT_PTX_LDU_G_v4i16_ELE
2202  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2203    Int16Regs>;
2204defm INT_PTX_LDU_G_v4i32_ELE
2205  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2206    Int32Regs>;
2207defm INT_PTX_LDU_G_v4f16_ELE
2208  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2209    Float16Regs>;
2210defm INT_PTX_LDU_G_v4f16x2_ELE
2211  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2212    Float16x2Regs>;
2213defm INT_PTX_LDU_G_v4f32_ELE
2214  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2215    Float32Regs>;
2216
2217
2218//-----------------------------------
2219// Support for ldg on sm_35 or later
2220//-----------------------------------
2221
2222// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2223// non-coherent texture cache, and therefore the values read must be read-only
2224// during the lifetime of the kernel.
2225
2226multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2227  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2228               !strconcat("ld.global.nc.", TyStr),
2229                      []>, Requires<[hasLDG]>;
2230  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2231               !strconcat("ld.global.nc.", TyStr),
2232                        []>, Requires<[hasLDG]>;
2233 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2234               !strconcat("ld.global.nc.", TyStr),
2235                      []>, Requires<[hasLDG]>;
2236 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2237               !strconcat("ld.global.nc.", TyStr),
2238                      []>, Requires<[hasLDG]>;
2239 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2240               !strconcat("ld.global.nc.", TyStr),
2241                        []>, Requires<[hasLDG]>;
2242}
2243
2244defm INT_PTX_LDG_GLOBAL_i8
2245  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2246defm INT_PTX_LDG_GLOBAL_i16
2247  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2248defm INT_PTX_LDG_GLOBAL_i32
2249  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2250defm INT_PTX_LDG_GLOBAL_i64
2251  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2252defm INT_PTX_LDG_GLOBAL_f16
2253  : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
2254defm INT_PTX_LDG_GLOBAL_f16x2
2255  : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
2256defm INT_PTX_LDG_GLOBAL_f32
2257  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2258defm INT_PTX_LDG_GLOBAL_f64
2259  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2260defm INT_PTX_LDG_GLOBAL_p32
2261  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2262defm INT_PTX_LDG_GLOBAL_p64
2263  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2264
2265// vector
2266
2267// Elementized vector ldg
2268multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2269 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2270                     (ins Int32Regs:$src),
2271                     !strconcat("ld.global.nc.", TyStr), []>;
2272 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2273                     (ins Int64Regs:$src),
2274                     !strconcat("ld.global.nc.", TyStr), []>;
2275 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2276                     (ins MEMri:$src),
2277                     !strconcat("ld.global.nc.", TyStr), []>;
2278 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2279                     (ins MEMri64:$src),
2280                     !strconcat("ld.global.nc.", TyStr), []>;
2281 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2282                     (ins imemAny:$src),
2283                     !strconcat("ld.global.nc.", TyStr), []>;
2284}
2285
2286multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2287  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2288                              regclass:$dst4), (ins Int32Regs:$src),
2289               !strconcat("ld.global.nc.", TyStr), []>;
2290  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2291                               regclass:$dst4), (ins Int64Regs:$src),
2292               !strconcat("ld.global.nc.", TyStr), []>;
2293  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2294                              regclass:$dst4), (ins MEMri:$src),
2295               !strconcat("ld.global.nc.", TyStr), []>;
2296  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2297                              regclass:$dst4), (ins MEMri64:$src),
2298               !strconcat("ld.global.nc.", TyStr), []>;
2299  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2300                             regclass:$dst4), (ins imemAny:$src),
2301               !strconcat("ld.global.nc.", TyStr), []>;
2302}
2303
2304// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2305defm INT_PTX_LDG_G_v2i8_ELE
2306  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2307defm INT_PTX_LDG_G_v2i16_ELE
2308  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2309defm INT_PTX_LDG_G_v2i32_ELE
2310  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2311defm INT_PTX_LDG_G_v2f16_ELE
2312  : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2313defm INT_PTX_LDG_G_v2f16x2_ELE
2314  : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2315defm INT_PTX_LDG_G_v2f32_ELE
2316  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2317defm INT_PTX_LDG_G_v2i64_ELE
2318  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2319defm INT_PTX_LDG_G_v2f64_ELE
2320  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2321defm INT_PTX_LDG_G_v4i8_ELE
2322  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2323defm INT_PTX_LDG_G_v4i16_ELE
2324  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2325defm INT_PTX_LDG_G_v4i32_ELE
2326  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2327defm INT_PTX_LDG_G_v4f16_ELE
2328  : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
2329defm INT_PTX_LDG_G_v4f16x2_ELE
2330  : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
2331defm INT_PTX_LDG_G_v4f32_ELE
2332  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2333
2334
2335multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2336   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2337          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2338      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2339   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2340          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2341      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2342   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2343          "{{ .reg .b64 %tmp;\n\t"
2344          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2345          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2346      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2347      Requires<[useShortPtr]>;
2348}
2349
2350multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2351   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2352          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2353      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2354   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2355          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2356      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2357   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2358          "{{ .reg .b64 %tmp;\n\t"
2359          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2360          #"  cvt.u32.u64 \t$result, %tmp; }}",
2361      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2362      Requires<[useShortPtr]>;
2363}
2364
2365defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2366defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2367defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2368defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2369
2370defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2371defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2372defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2373defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2374
2375
2376// nvvm.ptr.gen.to.param
2377def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2378  (ins Int32Regs:$src),
2379                        "mov.u32 \t$result, $src;",
2380                              [(set Int32Regs:$result,
2381                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2382def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2383  (ins Int64Regs:$src),
2384                        "mov.u64 \t$result, $src;",
2385                              [(set Int64Regs:$result,
2386                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2387
2388
2389// nvvm.move intrinsicc
2390def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2391                             "mov.b16 \t$r, $s;",
2392                             [(set Int16Regs:$r,
2393                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2394def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2395                             "mov.b32 \t$r, $s;",
2396                             [(set Int32Regs:$r,
2397                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2398def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2399                             "mov.b64 \t$r, $s;",
2400                             [(set Int64Regs:$r,
2401                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2402def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2403                             "mov.f32 \t$r, $s;",
2404                             [(set Float32Regs:$r,
2405                               (int_nvvm_move_float Float32Regs:$s))]>;
2406def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2407                             "mov.f64 \t$r, $s;",
2408                             [(set Float64Regs:$r,
2409                               (int_nvvm_move_double Float64Regs:$s))]>;
2410def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2411                             "mov.u32 \t$r, $s;",
2412                             [(set Int32Regs:$r,
2413                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2414def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2415                             "mov.u64 \t$r, $s;",
2416                             [(set Int64Regs:$r,
2417                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2418
2419// @TODO: Are these actually needed, or will we always just see symbols
2420// copied to registers first?
2421/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2422                             "mov.u32 \t$r, $s;",
2423                             [(set Int32Regs:$r,
2424                             (int_nvvm_move_ptr texternalsym:$s))]>;
2425def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2426                             "mov.u64 \t$r, $s;",
2427                             [(set Int64Regs:$r,
2428                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2429
2430
2431// MoveParam        %r1, param
2432// ptr_local_to_gen %r2, %r1
2433// ptr_gen_to_local %r3, %r2
2434// ->
2435// mov %r1, param
2436
2437// @TODO: Revisit this.  There is a type
2438// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2439// instructions are not currently defined. However, we can use the ptr
2440// variants and the asm printer will do the right thing.
2441def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2442                (MoveParam texternalsym:$src)))),
2443               (nvvm_move_ptr64  texternalsym:$src)>;
2444def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2445                (MoveParam texternalsym:$src)))),
2446               (nvvm_move_ptr32  texternalsym:$src)>;
2447
2448def texsurf_handles
2449  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2450              "mov.u64 \t$result, $src;", []>;
2451
2452//-----------------------------------
2453// Compiler Error Warn
2454// - Just ignore them in codegen
2455//-----------------------------------
2456
2457def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2458                "// llvm.nvvm.compiler.warn()",
2459                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2460def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2461                "// llvm.nvvm.compiler.warn()",
2462                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2463def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2464                "// llvm.nvvm.compiler.error()",
2465                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2466def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2467                "// llvm.nvvm.compiler.error()",
2468                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2469
2470
2471// isspacep
2472
2473def ISSPACEP_CONST_32
2474  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2475              "isspacep.const \t$d, $a;",
2476              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2477    Requires<[hasPTX31]>;
2478def ISSPACEP_CONST_64
2479  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2480              "isspacep.const \t$d, $a;",
2481              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2482    Requires<[hasPTX31]>;
2483def ISSPACEP_GLOBAL_32
2484  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2485              "isspacep.global \t$d, $a;",
2486              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2487def ISSPACEP_GLOBAL_64
2488  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2489              "isspacep.global \t$d, $a;",
2490              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2491def ISSPACEP_LOCAL_32
2492  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2493              "isspacep.local \t$d, $a;",
2494              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2495def ISSPACEP_LOCAL_64
2496  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2497              "isspacep.local \t$d, $a;",
2498              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2499def ISSPACEP_SHARED_32
2500  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2501              "isspacep.shared \t$d, $a;",
2502              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2503def ISSPACEP_SHARED_64
2504  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2505              "isspacep.shared \t$d, $a;",
2506              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2507
2508
2509// Special register reads
2510def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2511                            (ins SpecialRegs:$r),
2512                            "mov.b32 \t$d, $r;", []>;
2513
2514def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2515def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2516def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2517def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2518def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2519def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2520def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2521def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2522def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2523def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2524def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2525def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2526def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2527def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2528def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2529def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2530def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2531def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2532def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2533def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2534def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2535def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2536def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2537def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2538def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2539def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2540def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2541def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2542def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2543def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2544def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2545def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2546
2547
2548// rotate builtin support
2549
2550def ROTATE_B32_HW_IMM
2551  : NVPTXInst<(outs Int32Regs:$dst),
2552              (ins  Int32Regs:$src, i32imm:$amt),
2553              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2554              [(set Int32Regs:$dst,
2555                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2556              Requires<[hasHWROT32]> ;
2557
2558def ROTATE_B32_HW_REG
2559  : NVPTXInst<(outs Int32Regs:$dst),
2560              (ins  Int32Regs:$src, Int32Regs:$amt),
2561              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2562              [(set Int32Regs:$dst,
2563                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2564              Requires<[hasHWROT32]> ;
2565
2566def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2567          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2568      Requires<[noHWROT32]> ;
2569
2570def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2571          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2572      Requires<[noHWROT32]> ;
2573
2574let hasSideEffects = false in {
2575  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2576    !strconcat("{{\n\t",
2577               ".reg .b32 %dummy;\n\t",
2578               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2579               "}}"),
2580          []> ;
2581
2582  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2583    !strconcat("{{\n\t",
2584               ".reg .b32 %dummy;\n\t",
2585               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2586               "}}"),
2587          []> ;
2588}
2589
2590let hasSideEffects = false in {
2591  def PACK_TWO_INT32
2592    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2593                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2594}
2595
2596def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2597          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2598                          (GET_LO_INT64 Int64Regs:$src))> ;
2599
2600// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2601// no side effects.
2602let hasSideEffects = false in {
2603  def SHF_L_WRAP_B32_IMM
2604    : NVPTXInst<(outs Int32Regs:$dst),
2605                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2606                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2607      Requires<[hasHWROT32]>;
2608
2609  def SHF_L_WRAP_B32_REG
2610    : NVPTXInst<(outs Int32Regs:$dst),
2611                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2612                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2613      Requires<[hasHWROT32]>;
2614
2615  def SHF_R_WRAP_B32_IMM
2616    : NVPTXInst<(outs Int32Regs:$dst),
2617                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2618                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2619      Requires<[hasHWROT32]>;
2620
2621  def SHF_R_WRAP_B32_REG
2622    : NVPTXInst<(outs Int32Regs:$dst),
2623                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2624                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2625      Requires<[hasHWROT32]>;
2626}
2627
2628// HW version of rotate 64
2629def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2630          (PACK_TWO_INT32
2631            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2632                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2633            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2634                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2635      Requires<[hasHWROT32]>;
2636
2637def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2638          (PACK_TWO_INT32
2639            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2640                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2641            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2642                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2643      Requires<[hasHWROT32]>;
2644
2645
2646def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2647          (PACK_TWO_INT32
2648            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2649                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2650            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2651                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2652      Requires<[hasHWROT32]>;
2653
2654def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2655          (PACK_TWO_INT32
2656            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2657                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2658            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2659                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2660      Requires<[hasHWROT32]>;
2661
2662// SW version of rotate 64
2663def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2664          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
2665      Requires<[noHWROT32]>;
2666def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2667          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2668      Requires<[noHWROT32]>;
2669def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2670          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2671      Requires<[noHWROT32]>;
2672def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2673          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2674      Requires<[noHWROT32]>;
2675
2676
2677//-----------------------------------
2678// Texture Intrinsics
2679//-----------------------------------
2680
2681// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2682// also defined in NVPTXReplaceImageHandles.cpp
2683
2684// texmode_independent
2685let IsTex = true, IsTexModeUnified = false in {
2686// Texture fetch instructions using handles
2687
2688class TEX_1D_base<string inst, NVPTXRegClass outtype,
2689                  NVPTXRegClass intype, dag texsamp>
2690    : NVPTXInst<(outs outtype:$r, outtype:$g,
2691                      outtype:$b, outtype:$a),
2692                 !con(texsamp, (ins intype:$x)),
2693                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2694                 []>;
2695
2696multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2697  def _RR : TEX_1D_base<inst, outtype, intype,
2698                        (ins Int64Regs:$t, Int64Regs:$s)>;
2699  def _RI : TEX_1D_base<inst, outtype, intype,
2700                        (ins Int64Regs:$t, i64imm:$s)>;
2701  def _IR : TEX_1D_base<inst, outtype, intype,
2702                        (ins i64imm:$t, Int64Regs:$s)>;
2703  def _II : TEX_1D_base<inst, outtype, intype,
2704                        (ins i64imm:$t, i64imm:$s)>;
2705}
2706
2707defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2708defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2709defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2710defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2711defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2712defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2713
2714class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2715                        NVPTXRegClass intype, dag texsamp>
2716    : NVPTXInst<(outs outtype:$r, outtype:$g,
2717                      outtype:$b, outtype:$a),
2718                 !con(texsamp, (ins intype:$x, intype:$lod)),
2719                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2720                 []>;
2721
2722multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2723                        NVPTXRegClass intype> {
2724  def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2725                              (ins Int64Regs:$t, Int64Regs:$s)>;
2726  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2727                              (ins Int64Regs:$t, i64imm:$s)>;
2728  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2729                              (ins i64imm:$t, Int64Regs:$s)>;
2730  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2731                              (ins i64imm:$t, i64imm:$s)>;
2732}
2733
2734defm TEX_1D_F32_F32_LEVEL :
2735  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2736defm TEX_1D_S32_F32_LEVEL :
2737  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2738defm TEX_1D_U32_F32_LEVEL :
2739  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2740
2741class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2742                       NVPTXRegClass intype, dag texsamp>
2743    : NVPTXInst<(outs outtype:$r, outtype:$g,
2744                      outtype:$b, outtype:$a),
2745                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2746                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2747                        " \\{$gradx\\}, \\{$grady\\};",
2748                 []>;
2749
2750multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2751                       NVPTXRegClass intype> {
2752  def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2753                             (ins Int64Regs:$t, Int64Regs:$s)>;
2754  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2755                             (ins Int64Regs:$t, i64imm:$s)>;
2756  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2757                             (ins i64imm:$t, Int64Regs:$s)>;
2758  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2759                             (ins i64imm:$t, i64imm:$s)>;
2760}
2761
2762defm TEX_1D_F32_F32_GRAD
2763  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2764defm TEX_1D_S32_F32_GRAD
2765  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2766defm TEX_1D_U32_F32_GRAD
2767  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2768
2769class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2770                        NVPTXRegClass intype, dag texsamp>
2771    : NVPTXInst<(outs outtype:$r, outtype:$g,
2772                      outtype:$b, outtype:$a),
2773                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2774                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2775                 []>;
2776
2777multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2778                        NVPTXRegClass intype> {
2779  def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2780                              (ins Int64Regs:$t, Int64Regs:$s)>;
2781  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2782                              (ins Int64Regs:$t, i64imm:$s)>;
2783  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2784                              (ins i64imm:$t, Int64Regs:$s)>;
2785  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2786                              (ins i64imm:$t, i64imm:$s)>;
2787}
2788
2789defm TEX_1D_ARRAY_F32_F32
2790  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2791defm TEX_1D_ARRAY_F32_S32
2792  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2793defm TEX_1D_ARRAY_S32_S32
2794  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2795defm TEX_1D_ARRAY_S32_F32
2796  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2797defm TEX_1D_ARRAY_U32_S32
2798  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2799defm TEX_1D_ARRAY_U32_F32
2800  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2801
2802class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2803                              NVPTXRegClass intype, dag texsamp>
2804    : NVPTXInst<(outs outtype:$r, outtype:$g,
2805                      outtype:$b, outtype:$a),
2806                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2807                 inst # " \t\\{$r, $g, $b, $a\\},"
2808                        " [$t, $s, \\{$l, $x\\}], $lod;",
2809                 []>;
2810
2811multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2812                              NVPTXRegClass intype> {
2813  def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2814                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2815  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2816                                    (ins Int64Regs:$t, i64imm:$s)>;
2817  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2818                                    (ins i64imm:$t, Int64Regs:$s)>;
2819  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2820                                    (ins i64imm:$t, i64imm:$s)>;
2821}
2822
2823defm TEX_1D_ARRAY_F32_F32_LEVEL
2824  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2825defm TEX_1D_ARRAY_S32_F32_LEVEL
2826  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2827defm TEX_1D_ARRAY_U32_F32_LEVEL
2828  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2829
2830class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2831                             NVPTXRegClass intype, dag texsamp>
2832    : NVPTXInst<(outs outtype:$r, outtype:$g,
2833                      outtype:$b, outtype:$a),
2834                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2835                                    intype:$gradx, intype:$grady)),
2836                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2837                        " \\{$gradx\\}, \\{$grady\\};",
2838                 []>;
2839
2840multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2841                             NVPTXRegClass intype> {
2842  def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2843                                   (ins Int64Regs:$t, Int64Regs:$s)>;
2844  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2845                                   (ins Int64Regs:$t, i64imm:$s)>;
2846  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2847                                   (ins i64imm:$t, Int64Regs:$s)>;
2848  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2849                                   (ins i64imm:$t, i64imm:$s)>;
2850}
2851
2852defm TEX_1D_ARRAY_F32_F32_GRAD
2853  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2854defm TEX_1D_ARRAY_S32_F32_GRAD
2855  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2856defm TEX_1D_ARRAY_U32_F32_GRAD
2857  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2858
2859class TEX_2D_base<string inst, NVPTXRegClass outtype,
2860                  NVPTXRegClass intype, dag texsamp>
2861    : NVPTXInst<(outs outtype:$r, outtype:$g,
2862                      outtype:$b, outtype:$a),
2863                 !con(texsamp, (ins intype:$x, intype:$y)),
2864                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2865                 []>;
2866
2867multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2868  def _RR : TEX_2D_base<inst, outtype, intype,
2869                        (ins Int64Regs:$t, Int64Regs:$s)>;
2870  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2871  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2872  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2873}
2874
2875defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2876defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2877defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2878defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2879defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2880defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2881
2882class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2883                        NVPTXRegClass intype, dag texsamp>
2884    : NVPTXInst<(outs outtype:$r, outtype:$g,
2885                      outtype:$b, outtype:$a),
2886                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2887                 inst # " \t\\{$r, $g, $b, $a\\},"
2888                        " [$t, $s, \\{$x, $y\\}], $lod;",
2889                 []>;
2890
2891multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2892                        NVPTXRegClass intype> {
2893  def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2894                              (ins Int64Regs:$t, Int64Regs:$s)>;
2895  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2896                              (ins Int64Regs:$t, i64imm:$s)>;
2897  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2898                              (ins i64imm:$t, Int64Regs:$s)>;
2899  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2900                              (ins i64imm:$t, i64imm:$s)>;
2901}
2902
2903defm TEX_2D_F32_F32_LEVEL :
2904  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2905defm TEX_2D_S32_F32_LEVEL :
2906  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2907defm TEX_2D_U32_F32_LEVEL :
2908  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2909
2910class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2911                       NVPTXRegClass intype, dag texsamp>
2912    : NVPTXInst<(outs outtype:$r, outtype:$g,
2913                      outtype:$b, outtype:$a),
2914                 !con(texsamp, (ins intype:$x, intype:$y,
2915                                    intype:$gradx0, intype:$gradx1,
2916                                    intype:$grady0, intype:$grady1)),
2917                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2918                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2919                 []>;
2920
2921multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2922                       NVPTXRegClass intype> {
2923  def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2924                              (ins Int64Regs:$t, Int64Regs:$s)>;
2925  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2926                              (ins Int64Regs:$t, i64imm:$s)>;
2927  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2928                              (ins i64imm:$t, Int64Regs:$s)>;
2929  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2930                              (ins i64imm:$t, i64imm:$s)>;
2931}
2932
2933defm TEX_2D_F32_F32_GRAD :
2934  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2935defm TEX_2D_S32_F32_GRAD :
2936  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2937defm TEX_2D_U32_F32_GRAD :
2938  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2939
2940class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2941                        NVPTXRegClass intype, dag texsamp>
2942    : NVPTXInst<(outs outtype:$r, outtype:$g,
2943                      outtype:$b, outtype:$a),
2944                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2945                 inst # " \t\\{$r, $g, $b, $a\\},"
2946                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
2947                 []>;
2948
2949multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2950                        NVPTXRegClass intype> {
2951  def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2952                              (ins Int64Regs:$t, Int64Regs:$s)>;
2953  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2954                              (ins Int64Regs:$t, i64imm:$s)>;
2955  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2956                              (ins i64imm:$t, Int64Regs:$s)>;
2957  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2958                              (ins i64imm:$t, i64imm:$s)>;
2959}
2960
2961defm TEX_2D_ARRAY_F32_F32
2962  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2963defm TEX_2D_ARRAY_F32_S32
2964  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2965defm TEX_2D_ARRAY_S32_S32
2966  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2967defm TEX_2D_ARRAY_S32_F32
2968  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2969defm TEX_2D_ARRAY_U32_S32
2970  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
2971defm TEX_2D_ARRAY_U32_F32
2972  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2973
2974class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2975                              NVPTXRegClass intype, dag texsamp>
2976    : NVPTXInst<(outs outtype:$r, outtype:$g,
2977                      outtype:$b, outtype:$a),
2978                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2979                                    intype:$lod)),
2980                 inst # " \t\\{$r, $g, $b, $a\\},"
2981                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2982                 []>;
2983
2984multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2985                              NVPTXRegClass intype> {
2986  def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2987                              (ins Int64Regs:$t, Int64Regs:$s)>;
2988  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2989                              (ins Int64Regs:$t, i64imm:$s)>;
2990  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2991                              (ins i64imm:$t, Int64Regs:$s)>;
2992  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2993                              (ins i64imm:$t, i64imm:$s)>;
2994}
2995
2996defm TEX_2D_ARRAY_F32_F32_LEVEL
2997  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2998defm TEX_2D_ARRAY_S32_F32_LEVEL
2999  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3000defm TEX_2D_ARRAY_U32_F32_LEVEL
3001  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3002
3003class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3004                             NVPTXRegClass intype, dag texsamp>
3005    : NVPTXInst<(outs outtype:$r, outtype:$g,
3006                      outtype:$b, outtype:$a),
3007                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3008                                    intype:$gradx0, intype:$gradx1,
3009                                    intype:$grady0, intype:$grady1)),
3010                 inst # " \t\\{$r, $g, $b, $a\\},"
3011                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
3012                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3013                 []>;
3014
3015multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3016                             NVPTXRegClass intype> {
3017  def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3018                              (ins Int64Regs:$t, Int64Regs:$s)>;
3019  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3020                              (ins Int64Regs:$t, i64imm:$s)>;
3021  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3022                              (ins i64imm:$t, Int64Regs:$s)>;
3023  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3024                              (ins i64imm:$t, i64imm:$s)>;
3025}
3026
3027defm TEX_2D_ARRAY_F32_F32_GRAD
3028  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3029defm TEX_2D_ARRAY_S32_F32_GRAD
3030  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3031defm TEX_2D_ARRAY_U32_F32_GRAD
3032  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3033
3034class TEX_3D_base<string inst, NVPTXRegClass outtype,
3035                  NVPTXRegClass intype, dag texsamp>
3036    : NVPTXInst<(outs outtype:$r, outtype:$g,
3037                      outtype:$b, outtype:$a),
3038                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3039                 inst # " \t\\{$r, $g, $b, $a\\},"
3040                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3041                 []>;
3042
3043multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3044  def _RR : TEX_3D_base<inst, outtype, intype,
3045                              (ins Int64Regs:$t, Int64Regs:$s)>;
3046  def _RI : TEX_3D_base<inst, outtype, intype,
3047                              (ins Int64Regs:$t, i64imm:$s)>;
3048  def _IR : TEX_3D_base<inst, outtype, intype,
3049                              (ins i64imm:$t, Int64Regs:$s)>;
3050  def _II : TEX_3D_base<inst, outtype, intype,
3051                              (ins i64imm:$t, i64imm:$s)>;
3052}
3053
3054defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3055defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3056defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3057defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3058defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3059defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3060
3061class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3062                        NVPTXRegClass intype, dag texsamp>
3063    : NVPTXInst<(outs outtype:$r, outtype:$g,
3064                      outtype:$b, outtype:$a),
3065                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3066                                    intype:$lod)),
3067                 inst # " \t\\{$r, $g, $b, $a\\},"
3068                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3069                 []>;
3070
3071multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3072                        NVPTXRegClass intype> {
3073  def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3074                              (ins Int64Regs:$t, Int64Regs:$s)>;
3075  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3076                              (ins Int64Regs:$t, i64imm:$s)>;
3077  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3078                              (ins i64imm:$t, Int64Regs:$s)>;
3079  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3080                              (ins i64imm:$t, i64imm:$s)>;
3081}
3082
3083defm TEX_3D_F32_F32_LEVEL
3084  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3085defm TEX_3D_S32_F32_LEVEL
3086  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3087defm TEX_3D_U32_F32_LEVEL
3088  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3089
3090class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3091                       NVPTXRegClass intype, dag texsamp>
3092    : NVPTXInst<(outs outtype:$r, outtype:$g,
3093                      outtype:$b, outtype:$a),
3094                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3095                                    intype :$gradx0, intype:$gradx1,
3096                                    intype:$gradx2, intype:$grady0,
3097                                    intype:$grady1, intype:$grady2)),
3098                 inst # " \t\\{$r, $g, $b, $a\\},"
3099                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
3100                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3101                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3102                 []>;
3103
3104multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3105                       NVPTXRegClass intype> {
3106  def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3107                             (ins Int64Regs:$t, Int64Regs:$s)>;
3108  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3109                             (ins Int64Regs:$t, i64imm:$s)>;
3110  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3111                             (ins i64imm:$t, Int64Regs:$s)>;
3112  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3113                             (ins i64imm:$t, i64imm:$s)>;
3114}
3115
3116defm TEX_3D_F32_F32_GRAD
3117  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3118defm TEX_3D_S32_F32_GRAD
3119  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3120defm TEX_3D_U32_F32_GRAD
3121  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3122
3123class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3124                    NVPTXRegClass intype, dag texsamp>
3125    : NVPTXInst<(outs outtype:$r, outtype:$g,
3126                      outtype:$b, outtype:$a),
3127                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3128                 inst # " \t\\{$r, $g, $b, $a\\},"
3129                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
3130                 []>;
3131
3132multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3133  def _RR : TEX_CUBE_base<inst, outtype, intype,
3134                          (ins Int64Regs:$t, Int64Regs:$s)>;
3135  def _RI : TEX_CUBE_base<inst, outtype, intype,
3136                          (ins Int64Regs:$t, i64imm:$s)>;
3137  def _IR : TEX_CUBE_base<inst, outtype, intype,
3138                          (ins i64imm:$t, Int64Regs:$s)>;
3139  def _II : TEX_CUBE_base<inst, outtype, intype,
3140                          (ins i64imm:$t, i64imm:$s)>;
3141}
3142
3143defm TEX_CUBE_F32_F32
3144  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3145defm TEX_CUBE_S32_F32
3146  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3147defm TEX_CUBE_U32_F32
3148  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3149
3150class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3151                          NVPTXRegClass intype, dag texsamp>
3152    : NVPTXInst<(outs outtype:$r, outtype:$g,
3153                      outtype:$b, outtype:$a),
3154                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3155                                    intype:$lod)),
3156                 inst # " \t\\{$r, $g, $b, $a\\},"
3157                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
3158                 []>;
3159
3160multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3161                          NVPTXRegClass intype> {
3162  def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3163                                (ins Int64Regs:$t, Int64Regs:$s)>;
3164  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3165                                (ins Int64Regs:$t, i64imm:$s)>;
3166  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3167                                (ins i64imm:$t, Int64Regs:$s)>;
3168  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3169                                (ins i64imm:$t, i64imm:$s)>;
3170}
3171
3172defm TEX_CUBE_F32_F32_LEVEL
3173  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3174defm TEX_CUBE_S32_F32_LEVEL
3175  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3176defm TEX_CUBE_U32_F32_LEVEL
3177  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3178
3179class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3180                          NVPTXRegClass intype, dag texsamp>
3181    : NVPTXInst<(outs outtype:$r, outtype:$g,
3182                      outtype:$b, outtype:$a),
3183                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3184                                    intype:$z)),
3185                 inst # " \t\\{$r, $g, $b, $a\\},"
3186                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
3187                 []>;
3188
3189multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3190                          NVPTXRegClass intype> {
3191  def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3192                                (ins Int64Regs:$t, Int64Regs:$s)>;
3193  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3194                                (ins Int64Regs:$t, i64imm:$s)>;
3195  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3196                                (ins i64imm:$t, Int64Regs:$s)>;
3197  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3198                                (ins i64imm:$t, i64imm:$s)>;
3199}
3200
3201defm TEX_CUBE_ARRAY_F32_F32
3202  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3203defm TEX_CUBE_ARRAY_S32_F32
3204  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3205defm TEX_CUBE_ARRAY_U32_F32
3206  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3207
3208class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3209                                NVPTXRegClass intype, dag texsamp>
3210    : NVPTXInst<(outs outtype:$r, outtype:$g,
3211                      outtype:$b, outtype:$a),
3212                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3213                                    intype:$z, intype:$lod)),
3214                 inst # " \t\\{$r, $g, $b, $a\\},"
3215                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3216                 []>;
3217
3218multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3219                                NVPTXRegClass intype> {
3220  def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3221                                      (ins Int64Regs:$t, Int64Regs:$s)>;
3222  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3223                                      (ins Int64Regs:$t, i64imm:$s)>;
3224  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3225                                      (ins i64imm:$t, Int64Regs:$s)>;
3226  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3227                                      (ins i64imm:$t, i64imm:$s)>;
3228}
3229
3230defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3231  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3232                         Float32Regs, Float32Regs>;
3233defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3234  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3235                         Int32Regs, Float32Regs>;
3236defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3237  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3238                         Int32Regs, Float32Regs>;
3239
3240class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3241                   NVPTXRegClass intype, dag texsamp>
3242    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3243                      outtype:$v2, outtype:$v3),
3244                 !con(texsamp, (ins intype:$x, intype:$y)),
3245                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3246                 []>;
3247
3248multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3249  def _RR : TLD4_2D_base<inst, outtype, intype,
3250                         (ins Int64Regs:$t, Int64Regs:$s)>;
3251  def _RI : TLD4_2D_base<inst, outtype, intype,
3252                         (ins Int64Regs:$t, i64imm:$s)>;
3253  def _IR : TLD4_2D_base<inst, outtype, intype,
3254                         (ins i64imm:$t, Int64Regs:$s)>;
3255  def _II : TLD4_2D_base<inst, outtype, intype,
3256                         (ins i64imm:$t, i64imm:$s)>;
3257}
3258
3259defm TLD4_R_2D_F32_F32
3260  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3261defm TLD4_G_2D_F32_F32
3262  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3263defm TLD4_B_2D_F32_F32
3264  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3265defm TLD4_A_2D_F32_F32
3266  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3267
3268defm TLD4_R_2D_S32_F32
3269  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3270defm TLD4_G_2D_S32_F32
3271  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3272defm TLD4_B_2D_S32_F32
3273  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3274defm TLD4_A_2D_S32_F32
3275  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3276
3277defm TLD4_R_2D_U32_F32
3278  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3279defm TLD4_G_2D_U32_F32
3280  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3281defm TLD4_B_2D_U32_F32
3282  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3283defm TLD4_A_2D_U32_F32
3284  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3285
3286}
3287
3288
3289// texmode_unified
3290let IsTex = true, IsTexModeUnified = true in {
3291// Texture fetch instructions using handles
3292
3293class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3294                          NVPTXRegClass intype, dag tex>
3295    : NVPTXInst<(outs outtype:$r, outtype:$g,
3296                      outtype:$b, outtype:$a),
3297                 !con(tex, (ins intype:$x)),
3298                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3299                 []>;
3300
3301multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3302                          NVPTXRegClass intype> {
3303  def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3304  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3305}
3306
3307defm TEX_UNIFIED_1D_F32_S32
3308  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3309defm TEX_UNIFIED_1D_F32_F32
3310  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3311defm TEX_UNIFIED_1D_S32_S32
3312  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3313defm TEX_UNIFIED_1D_S32_F32
3314  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3315defm TEX_UNIFIED_1D_U32_S32
3316  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3317defm TEX_UNIFIED_1D_U32_F32
3318  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3319
3320class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3321                                NVPTXRegClass intype, dag tex>
3322    : NVPTXInst<(outs outtype:$r, outtype:$g,
3323                      outtype:$b, outtype:$a),
3324                 !con(tex, (ins intype:$x, intype:$lod)),
3325                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3326                 []>;
3327
3328multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3329                                NVPTXRegClass intype> {
3330  def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3331  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3332}
3333
3334defm TEX_UNIFIED_1D_F32_F32_LEVEL
3335  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3336defm TEX_UNIFIED_1D_S32_F32_LEVEL
3337  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3338defm TEX_UNIFIED_1D_U32_F32_LEVEL
3339  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3340
3341class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3342                               NVPTXRegClass intype, dag tex>
3343    : NVPTXInst<(outs outtype:$r, outtype:$g,
3344                      outtype:$b, outtype:$a),
3345                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3346                 inst # " \t\\{$r, $g, $b, $a\\},"
3347                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3348                 []>;
3349
3350multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3351                               NVPTXRegClass intype> {
3352  def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3353  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3354}
3355
3356defm TEX_UNIFIED_1D_F32_F32_GRAD
3357  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3358defm TEX_UNIFIED_1D_S32_F32_GRAD
3359  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3360defm TEX_UNIFIED_1D_U32_F32_GRAD
3361  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3362
3363class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3364                                NVPTXRegClass intype, dag tex>
3365    : NVPTXInst<(outs outtype:$r, outtype:$g,
3366                      outtype:$b, outtype:$a),
3367                 !con(tex, (ins Int32Regs:$l, intype:$x)),
3368                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3369                 []>;
3370
3371multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3372                                NVPTXRegClass intype> {
3373  def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3374  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3375}
3376
3377defm TEX_UNIFIED_1D_ARRAY_F32_S32
3378  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3379defm TEX_UNIFIED_1D_ARRAY_F32_F32
3380  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3381defm TEX_UNIFIED_1D_ARRAY_S32_S32
3382  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3383defm TEX_UNIFIED_1D_ARRAY_S32_F32
3384  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3385defm TEX_UNIFIED_1D_ARRAY_U32_S32
3386  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3387defm TEX_UNIFIED_1D_ARRAY_U32_F32
3388  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3389
3390class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3391                                      NVPTXRegClass intype, dag tex>
3392    : NVPTXInst<(outs outtype:$r, outtype:$g,
3393                      outtype:$b, outtype:$a),
3394                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3395                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3396                 []>;
3397
3398multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3399                                      NVPTXRegClass intype> {
3400  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3401                                           (ins Int64Regs:$t)>;
3402  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3403                                           (ins i64imm:$t)>;
3404}
3405
3406defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3407  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3408                               Float32Regs, Float32Regs>;
3409defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3410  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3411                               Int32Regs, Float32Regs>;
3412defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3413  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3414                               Int32Regs, Float32Regs>;
3415
3416class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3417                                     NVPTXRegClass intype, dag tex>
3418    : NVPTXInst<(outs outtype:$r, outtype:$g,
3419                      outtype:$b, outtype:$a),
3420                 !con(tex, (ins Int32Regs:$l, intype:$x,
3421                                intype:$gradx, intype:$grady)),
3422                 inst # " \t\\{$r, $g, $b, $a\\},"
3423                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3424                 []>;
3425
3426multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3427                                     NVPTXRegClass intype> {
3428  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3429                                          (ins Int64Regs:$t)>;
3430  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3431                                          (ins i64imm:$t)>;
3432}
3433
3434defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3435  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3436                              Float32Regs, Float32Regs>;
3437defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3438  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3439                              Int32Regs, Float32Regs>;
3440defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3441  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3442                              Int32Regs, Float32Regs>;
3443
3444class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3445                          NVPTXRegClass intype, dag tex>
3446    : NVPTXInst<(outs outtype:$r, outtype:$g,
3447                      outtype:$b, outtype:$a),
3448                 !con(tex, (ins intype:$x, intype:$y)),
3449                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3450                 []>;
3451
3452multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3453                          NVPTXRegClass intype> {
3454  def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3455  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3456}
3457
3458defm TEX_UNIFIED_2D_F32_S32
3459  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3460defm TEX_UNIFIED_2D_F32_F32
3461  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3462defm TEX_UNIFIED_2D_S32_S32
3463  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3464defm TEX_UNIFIED_2D_S32_F32
3465  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3466defm TEX_UNIFIED_2D_U32_S32
3467  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3468defm TEX_UNIFIED_2D_U32_F32
3469  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3470
3471class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3472                                NVPTXRegClass intype, dag tex>
3473    : NVPTXInst<(outs outtype:$r, outtype:$g,
3474                      outtype:$b, outtype:$a),
3475                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3476                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3477                 []>;
3478
3479multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3480                                NVPTXRegClass intype> {
3481  def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3482  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3483}
3484
3485defm TEX_UNIFIED_2D_F32_F32_LEVEL
3486  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3487defm TEX_UNIFIED_2D_S32_F32_LEVEL
3488  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3489defm TEX_UNIFIED_2D_U32_F32_LEVEL
3490  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3491
3492class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3493                               NVPTXRegClass intype, dag tex>
3494    : NVPTXInst<(outs outtype:$r, outtype:$g,
3495                      outtype:$b, outtype:$a),
3496                 !con(tex, (ins intype:$x, intype:$y,
3497                                intype:$gradx0, intype:$gradx1,
3498                                intype:$grady0, intype:$grady1)),
3499                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3500                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3501                 []>;
3502multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3503                               NVPTXRegClass intype> {
3504  def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3505  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3506}
3507
3508defm TEX_UNIFIED_2D_F32_F32_GRAD
3509  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3510defm TEX_UNIFIED_2D_S32_F32_GRAD
3511  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3512defm TEX_UNIFIED_2D_U32_F32_GRAD
3513  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3514
3515class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3516                                NVPTXRegClass intype, dag tex>
3517    : NVPTXInst<(outs outtype:$r, outtype:$g,
3518                      outtype:$b, outtype:$a),
3519                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3520                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3521                 []>;
3522multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3523                                NVPTXRegClass intype> {
3524  def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3525  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3526}
3527
3528defm TEX_UNIFIED_2D_ARRAY_F32_S32
3529  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3530defm TEX_UNIFIED_2D_ARRAY_F32_F32
3531  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3532defm TEX_UNIFIED_2D_ARRAY_S32_S32
3533  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3534defm TEX_UNIFIED_2D_ARRAY_S32_F32
3535  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3536defm TEX_UNIFIED_2D_ARRAY_U32_S32
3537  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3538defm TEX_UNIFIED_2D_ARRAY_U32_F32
3539  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3540
3541class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3542                                      NVPTXRegClass intype, dag tex>
3543    : NVPTXInst<(outs outtype:$r, outtype:$g,
3544                      outtype:$b, outtype:$a),
3545                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3546                                intype:$lod)),
3547                 inst # " \t\\{$r, $g, $b, $a\\},"
3548                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3549                 []>;
3550multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3551                                      NVPTXRegClass intype> {
3552  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3553                                           (ins Int64Regs:$t)>;
3554  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3555                                           (ins i64imm:$t)>;
3556}
3557
3558defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3559  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3560                               Float32Regs, Float32Regs>;
3561defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3562  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3563                               Int32Regs, Float32Regs>;
3564defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3565  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3566                               Int32Regs, Float32Regs>;
3567
3568class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3569                                     NVPTXRegClass intype, dag tex>
3570    : NVPTXInst<(outs outtype:$r, outtype:$g,
3571                      outtype:$b, outtype:$a),
3572                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3573                                intype:$gradx0, intype:$gradx1,
3574                                intype:$grady0, intype:$grady1)),
3575                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3576                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3577                 []>;
3578multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3579                                     NVPTXRegClass intype> {
3580  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3581                                          (ins Int64Regs:$t)>;
3582  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3583                                          (ins i64imm:$t)>;
3584}
3585
3586defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3587  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3588                              Float32Regs, Float32Regs>;
3589defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3590  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3591                              Int32Regs, Float32Regs>;
3592defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3593  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3594                              Int32Regs, Float32Regs>;
3595
3596class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3597                          NVPTXRegClass intype, dag tex>
3598    : NVPTXInst<(outs outtype:$r, outtype:$g,
3599                      outtype:$b, outtype:$a),
3600                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3601                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3602                 []>;
3603multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3604                          NVPTXRegClass intype> {
3605  def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3606  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3607}
3608
3609defm TEX_UNIFIED_3D_F32_S32
3610  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3611defm TEX_UNIFIED_3D_F32_F32
3612  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3613defm TEX_UNIFIED_3D_S32_S32
3614  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3615defm TEX_UNIFIED_3D_S32_F32
3616  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3617defm TEX_UNIFIED_3D_U32_S32
3618  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3619defm TEX_UNIFIED_3D_U32_F32
3620  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3621
3622class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3623                                NVPTXRegClass intype, dag tex>
3624    : NVPTXInst<(outs outtype:$r, outtype:$g,
3625                      outtype:$b, outtype:$a),
3626                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3627                 inst # " \t\\{$r, $g, $b, $a\\},"
3628                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3629                 []>;
3630multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3631                                NVPTXRegClass intype> {
3632  def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3633  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3634}
3635
3636defm TEX_UNIFIED_3D_F32_F32_LEVEL
3637  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3638defm TEX_UNIFIED_3D_S32_F32_LEVEL
3639  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3640defm TEX_UNIFIED_3D_U32_F32_LEVEL
3641  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3642
3643class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3644                               NVPTXRegClass intype, dag tex>
3645    : NVPTXInst<(outs outtype:$r, outtype:$g,
3646                      outtype:$b, outtype:$a),
3647                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3648                                intype:$gradx0, intype:$gradx1,
3649                                intype:$gradx2, intype:$grady0,
3650                                intype:$grady1, intype:$grady2)),
3651                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3652                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3653                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3654                 []>;
3655multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3656                               NVPTXRegClass intype> {
3657  def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3658  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3659}
3660
3661defm TEX_UNIFIED_3D_F32_F32_GRAD
3662  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3663defm TEX_UNIFIED_3D_S32_F32_GRAD
3664  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3665defm TEX_UNIFIED_3D_U32_F32_GRAD
3666  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3667
3668class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3669                            NVPTXRegClass intype, dag tex>
3670    : NVPTXInst<(outs outtype:$r, outtype:$g,
3671                      outtype:$b, outtype:$a),
3672                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3673                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3674                 []>;
3675multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3676                            NVPTXRegClass intype> {
3677  def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3678  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3679}
3680
3681defm TEX_UNIFIED_CUBE_F32_F32
3682  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3683defm TEX_UNIFIED_CUBE_S32_F32
3684  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3685defm TEX_UNIFIED_CUBE_U32_F32
3686  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3687
3688class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3689                                  NVPTXRegClass intype, dag tex>
3690    : NVPTXInst<(outs outtype:$r, outtype:$g,
3691                      outtype:$b, outtype:$a),
3692                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3693                 inst # " \t\\{$r, $g, $b, $a\\},"
3694                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3695                 []>;
3696multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3697                                  NVPTXRegClass intype> {
3698  def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3699                                       (ins Int64Regs:$t)>;
3700  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3701                                       (ins i64imm:$t)>;
3702}
3703
3704defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3705  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3706                           Float32Regs, Float32Regs>;
3707defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3708  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3709                           Int32Regs, Float32Regs>;
3710defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3711  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3712                           Int32Regs, Float32Regs>;
3713
3714class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3715                                  NVPTXRegClass intype, dag tex>
3716    : NVPTXInst<(outs outtype:$r, outtype:$g,
3717                      outtype:$b, outtype:$a),
3718                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3719                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3720                 []>;
3721multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3722                                  NVPTXRegClass intype> {
3723  def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3724                                       (ins Int64Regs:$t)>;
3725  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3726                                       (ins i64imm:$t)>;
3727}
3728
3729defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3730  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3731defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3732  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3733defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3734  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3735
3736class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3737                                        NVPTXRegClass intype, dag tex>
3738    : NVPTXInst<(outs outtype:$r, outtype:$g,
3739                      outtype:$b, outtype:$a),
3740                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3741                                intype:$lod)),
3742                 inst # " \t\\{$r, $g, $b, $a\\},"
3743                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3744                 []>;
3745multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3746                                        NVPTXRegClass intype> {
3747  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3748                                             (ins Int64Regs:$t)>;
3749  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3750                                             (ins i64imm:$t)>;
3751}
3752
3753defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3754  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3755                                 Float32Regs, Float32Regs>;
3756defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3757  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3758                                 Int32Regs, Float32Regs>;
3759defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3760  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3761                                 Int32Regs, Float32Regs>;
3762
3763class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3764                           NVPTXRegClass intype, dag tex>
3765    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3766                      outtype:$v2, outtype:$v3),
3767                 !con(tex, (ins intype:$x, intype:$y)),
3768                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3769                 []>;
3770multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3771                           NVPTXRegClass intype> {
3772  def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3773  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3774}
3775
3776defm TLD4_UNIFIED_R_2D_F32_F32
3777  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3778defm TLD4_UNIFIED_G_2D_F32_F32
3779  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3780defm TLD4_UNIFIED_B_2D_F32_F32
3781  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3782defm TLD4_UNIFIED_A_2D_F32_F32
3783  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3784
3785defm TLD4_UNIFIED_R_2D_S32_F32
3786  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3787defm TLD4_UNIFIED_G_2D_S32_F32
3788  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3789defm TLD4_UNIFIED_B_2D_S32_F32
3790  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3791defm TLD4_UNIFIED_A_2D_S32_F32
3792  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3793
3794defm TLD4_UNIFIED_R_2D_U32_F32
3795  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3796defm TLD4_UNIFIED_G_2D_U32_F32
3797  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3798defm TLD4_UNIFIED_B_2D_U32_F32
3799  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3800defm TLD4_UNIFIED_A_2D_U32_F32
3801  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3802
3803}
3804
3805
3806
3807//=== Surface load instructions
3808
3809let IsSuld = true in {
3810
3811class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3812    : NVPTXInst<(outs outtype:$r),
3813                !con(surf, (ins Int32Regs:$x)),
3814                inst # " \\{$r\\}, [$s, \\{$x\\}];",
3815                []>;
3816multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3817  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3818  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3819}
3820
3821defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3822defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3823defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3824defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3825
3826defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3827defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3828defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3829defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3830
3831defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3832defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3833defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3834defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3835
3836class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3837    : NVPTXInst<(outs outtype:$r),
3838                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3839                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3840                []>;
3841multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3842  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3843  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3844}
3845
3846defm SULD_1D_ARRAY_I8_CLAMP
3847  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3848defm SULD_1D_ARRAY_I16_CLAMP
3849  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3850defm SULD_1D_ARRAY_I32_CLAMP
3851  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3852defm SULD_1D_ARRAY_I64_CLAMP
3853  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3854
3855defm SULD_1D_ARRAY_I8_TRAP
3856  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3857defm SULD_1D_ARRAY_I16_TRAP
3858  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3859defm SULD_1D_ARRAY_I32_TRAP
3860  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3861defm SULD_1D_ARRAY_I64_TRAP
3862  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3863
3864defm SULD_1D_ARRAY_I8_ZERO
3865  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3866defm SULD_1D_ARRAY_I16_ZERO
3867  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3868defm SULD_1D_ARRAY_I32_ZERO
3869  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3870defm SULD_1D_ARRAY_I64_ZERO
3871  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3872
3873class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3874    : NVPTXInst<(outs outtype:$r),
3875                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3876                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3877                []>;
3878multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3879  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3880  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3881}
3882
3883defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3884defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3885defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3886defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3887
3888defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3889defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3890defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3891defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3892
3893defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3894defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3895defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3896defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3897
3898class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3899    : NVPTXInst<(outs outtype:$r),
3900                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3901                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3902                []>;
3903multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3904  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3905  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3906}
3907
3908defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3909defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3910defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3911defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
3912
3913defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
3914defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
3915defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
3916defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
3917
3918defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
3919defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
3920defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
3921defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
3922
3923class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
3924    : NVPTXInst<(outs outtype:$r),
3925                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
3926                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3927                []>;
3928multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
3929  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
3930  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
3931}
3932
3933defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
3934defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
3935defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
3936defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
3937
3938defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
3939defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
3940defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
3941defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
3942
3943defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
3944defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
3945defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
3946defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
3947}
3948
3949let IsSuld = 2 in {
3950
3951class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3952    : NVPTXInst<(outs outtype:$r, outtype:$g),
3953                !con(surf, (ins Int32Regs:$x)),
3954                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
3955                []>;
3956multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
3957  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3958  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
3959}
3960
3961defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
3962defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
3963defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
3964defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
3965
3966defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
3967defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
3968defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
3969defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
3970
3971defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
3972defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
3973defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
3974defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
3975
3976class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3977    : NVPTXInst<(outs outtype:$r, outtype:$g),
3978                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3979                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3980                []>;
3981multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
3982  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3983  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
3984}
3985
3986defm SULD_1D_ARRAY_V2I8_CLAMP
3987  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
3988defm SULD_1D_ARRAY_V2I16_CLAMP
3989  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
3990defm SULD_1D_ARRAY_V2I32_CLAMP
3991  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
3992defm SULD_1D_ARRAY_V2I64_CLAMP
3993  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
3994
3995defm SULD_1D_ARRAY_V2I8_TRAP
3996  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
3997defm SULD_1D_ARRAY_V2I16_TRAP
3998  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
3999defm SULD_1D_ARRAY_V2I32_TRAP
4000  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
4001defm SULD_1D_ARRAY_V2I64_TRAP
4002  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
4003
4004defm SULD_1D_ARRAY_V2I8_ZERO
4005  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4006defm SULD_1D_ARRAY_V2I16_ZERO
4007  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4008defm SULD_1D_ARRAY_V2I32_ZERO
4009  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4010defm SULD_1D_ARRAY_V2I64_ZERO
4011  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4012
4013class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4014    : NVPTXInst<(outs outtype:$r, outtype:$g),
4015                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4016                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
4017                []>;
4018multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4019  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4020  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4021}
4022
4023defm SULD_2D_V2I8_CLAMP
4024  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4025defm SULD_2D_V2I16_CLAMP
4026  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4027defm SULD_2D_V2I32_CLAMP
4028  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4029defm SULD_2D_V2I64_CLAMP
4030  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4031
4032defm SULD_2D_V2I8_TRAP
4033  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4034defm SULD_2D_V2I16_TRAP
4035  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4036defm SULD_2D_V2I32_TRAP
4037  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4038defm SULD_2D_V2I64_TRAP
4039  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4040
4041defm SULD_2D_V2I8_ZERO
4042  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4043defm SULD_2D_V2I16_ZERO
4044  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4045defm SULD_2D_V2I32_ZERO
4046  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4047defm SULD_2D_V2I64_ZERO
4048  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4049
4050class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4051    : NVPTXInst<(outs outtype:$r, outtype:$g),
4052                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4053                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
4054                []>;
4055multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4056  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4057  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4058}
4059
4060defm SULD_2D_ARRAY_V2I8_CLAMP
4061  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4062defm SULD_2D_ARRAY_V2I16_CLAMP
4063  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4064defm SULD_2D_ARRAY_V2I32_CLAMP
4065  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4066defm SULD_2D_ARRAY_V2I64_CLAMP
4067  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4068
4069defm SULD_2D_ARRAY_V2I8_TRAP
4070  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4071defm SULD_2D_ARRAY_V2I16_TRAP
4072  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4073defm SULD_2D_ARRAY_V2I32_TRAP
4074  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4075defm SULD_2D_ARRAY_V2I64_TRAP
4076  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4077
4078defm SULD_2D_ARRAY_V2I8_ZERO
4079  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4080defm SULD_2D_ARRAY_V2I16_ZERO
4081  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4082defm SULD_2D_ARRAY_V2I32_ZERO
4083  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4084defm SULD_2D_ARRAY_V2I64_ZERO
4085  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4086
4087class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4088    : NVPTXInst<(outs outtype:$r, outtype:$g),
4089                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4090                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
4091                []>;
4092multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4093  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4094  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4095}
4096
4097defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4098defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4099defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4100defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4101
4102defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4103defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4104defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4105defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4106
4107defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4108defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4109defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4110defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4111
4112}
4113
4114let IsSuld = 3 in {
4115
4116class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4117    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4118                !con(surf, (ins Int32Regs:$x)),
4119                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
4120                []>;
4121multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4122  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4123  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
4124}
4125
4126defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4127defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4128defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
4129
4130defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4131defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4132defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
4133
4134defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4135defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4136defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
4137
4138class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4139    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4140                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4141                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
4142                []>;
4143multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4144  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4145  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4146}
4147
4148defm SULD_1D_ARRAY_V4I8_CLAMP
4149  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4150defm SULD_1D_ARRAY_V4I16_CLAMP
4151  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4152defm SULD_1D_ARRAY_V4I32_CLAMP
4153  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
4154
4155defm SULD_1D_ARRAY_V4I8_TRAP
4156  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4157defm SULD_1D_ARRAY_V4I16_TRAP
4158  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4159defm SULD_1D_ARRAY_V4I32_TRAP
4160  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
4161
4162defm SULD_1D_ARRAY_V4I8_ZERO
4163  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4164defm SULD_1D_ARRAY_V4I16_ZERO
4165  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4166defm SULD_1D_ARRAY_V4I32_ZERO
4167  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
4168
4169class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4170    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4171                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4172                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
4173                []>;
4174multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4175  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4176  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
4177}
4178
4179defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4180defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4181defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
4182
4183defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4184defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4185defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
4186
4187defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4188defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4189defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4190
4191class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4192    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4193                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4194                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4195                []>;
4196multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4197  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4198  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4199}
4200
4201defm SULD_2D_ARRAY_V4I8_CLAMP
4202  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4203defm SULD_2D_ARRAY_V4I16_CLAMP
4204  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4205defm SULD_2D_ARRAY_V4I32_CLAMP
4206  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4207
4208defm SULD_2D_ARRAY_V4I8_TRAP
4209  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4210defm SULD_2D_ARRAY_V4I16_TRAP
4211  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4212defm SULD_2D_ARRAY_V4I32_TRAP
4213  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4214
4215defm SULD_2D_ARRAY_V4I8_ZERO
4216  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4217defm SULD_2D_ARRAY_V4I16_ZERO
4218  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4219defm SULD_2D_ARRAY_V4I32_ZERO
4220  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4221
4222class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4223    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4224                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4225                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4226                []>;
4227multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4228  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4229  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4230}
4231
4232defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4233defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4234defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4235
4236defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4237defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4238defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4239
4240defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4241defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4242defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4243
4244}
4245
4246//-----------------------------------
4247// Texture Query Intrinsics
4248//-----------------------------------
4249
4250let IsSurfTexQuery = true in {
4251def TXQ_CHANNEL_ORDER_R
4252  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4253              "txq.channel_order.b32 \t$d, [$a];",
4254              []>;
4255def TXQ_CHANNEL_ORDER_I
4256  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4257              "txq.channel_order.b32 \t$d, [$a];",
4258              []>;
4259def TXQ_CHANNEL_DATA_TYPE_R
4260  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4261              "txq.channel_data_type.b32 \t$d, [$a];",
4262              []>;
4263def TXQ_CHANNEL_DATA_TYPE_I
4264  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4265              "txq.channel_data_type.b32 \t$d, [$a];",
4266              []>;
4267def TXQ_WIDTH_R
4268  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4269              "txq.width.b32 \t$d, [$a];",
4270              []>;
4271def TXQ_WIDTH_I
4272  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4273              "txq.width.b32 \t$d, [$a];",
4274              []>;
4275def TXQ_HEIGHT_R
4276  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4277              "txq.height.b32 \t$d, [$a];",
4278              []>;
4279def TXQ_HEIGHT_I
4280  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4281              "txq.height.b32 \t$d, [$a];",
4282              []>;
4283def TXQ_DEPTH_R
4284  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4285              "txq.depth.b32 \t$d, [$a];",
4286              []>;
4287def TXQ_DEPTH_I
4288  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4289              "txq.depth.b32 \t$d, [$a];",
4290              []>;
4291def TXQ_ARRAY_SIZE_R
4292  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4293              "txq.array_size.b32 \t$d, [$a];",
4294              []>;
4295def TXQ_ARRAY_SIZE_I
4296  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4297              "txq.array_size.b32 \t$d, [$a];",
4298              []>;
4299def TXQ_NUM_SAMPLES_R
4300  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4301              "txq.num_samples.b32 \t$d, [$a];",
4302              []>;
4303def TXQ_NUM_SAMPLES_I
4304  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4305              "txq.num_samples.b32 \t$d, [$a];",
4306              []>;
4307def TXQ_NUM_MIPMAP_LEVELS_R
4308  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4309              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4310              []>;
4311def TXQ_NUM_MIPMAP_LEVELS_I
4312  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4313              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4314              []>;
4315}
4316
4317def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4318          (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4319def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4320          (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4321def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4322          (TXQ_WIDTH_R Int64Regs:$a)>;
4323def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4324          (TXQ_HEIGHT_R Int64Regs:$a)>;
4325def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4326          (TXQ_DEPTH_R Int64Regs:$a)>;
4327def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4328          (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4329def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4330          (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4331def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4332          (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4333
4334
4335//-----------------------------------
4336// Surface Query Intrinsics
4337//-----------------------------------
4338
4339let IsSurfTexQuery = true in {
4340def SUQ_CHANNEL_ORDER_R
4341  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4342              "suq.channel_order.b32 \t$d, [$a];",
4343              []>;
4344def SUQ_CHANNEL_ORDER_I
4345  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4346              "suq.channel_order.b32 \t$d, [$a];",
4347              []>;
4348def SUQ_CHANNEL_DATA_TYPE_R
4349  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4350              "suq.channel_data_type.b32 \t$d, [$a];",
4351              []>;
4352def SUQ_CHANNEL_DATA_TYPE_I
4353  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4354              "suq.channel_data_type.b32 \t$d, [$a];",
4355              []>;
4356def SUQ_WIDTH_R
4357  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4358              "suq.width.b32 \t$d, [$a];",
4359              []>;
4360def SUQ_WIDTH_I
4361  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4362              "suq.width.b32 \t$d, [$a];",
4363              []>;
4364def SUQ_HEIGHT_R
4365  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4366              "suq.height.b32 \t$d, [$a];",
4367              []>;
4368def SUQ_HEIGHT_I
4369  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4370              "suq.height.b32 \t$d, [$a];",
4371              []>;
4372def SUQ_DEPTH_R
4373  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4374              "suq.depth.b32 \t$d, [$a];",
4375              []>;
4376def SUQ_DEPTH_I
4377  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4378              "suq.depth.b32 \t$d, [$a];",
4379              []>;
4380def SUQ_ARRAY_SIZE_R
4381  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4382              "suq.array_size.b32 \t$d, [$a];",
4383              []>;
4384def SUQ_ARRAY_SIZE_I
4385  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4386              "suq.array_size.b32 \t$d, [$a];",
4387              []>;
4388}
4389
4390def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4391          (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4392def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4393          (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4394def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4395          (SUQ_WIDTH_R Int64Regs:$a)>;
4396def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4397          (SUQ_HEIGHT_R Int64Regs:$a)>;
4398def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4399          (SUQ_DEPTH_R Int64Regs:$a)>;
4400def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4401          (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4402
4403
4404//===- Handle Query -------------------------------------------------------===//
4405
4406// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4407def ISTYPEP_SAMPLER
4408  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4409              "istypep.samplerref \t$d, $a;",
4410              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4411def ISTYPEP_SURFACE
4412  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4413              "istypep.surfref \t$d, $a;",
4414              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4415def ISTYPEP_TEXTURE
4416  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4417              "istypep.texref \t$d, $a;",
4418              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4419
4420//===- Surface Stores -----------------------------------------------------===//
4421
4422let IsSust = true in {
4423
4424class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4425    : NVPTXInst<(outs),
4426                !con(surf, (ins Int32Regs:$x, intype:$r)),
4427                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4428                []>;
4429multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4430  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4431  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4432}
4433
4434defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4435defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4436defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4437defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4438
4439defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4440defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4441defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4442defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4443
4444defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4445defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4446defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4447defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4448
4449defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4450defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4451defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4452
4453class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4454    : NVPTXInst<(outs),
4455                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4456                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4457                []>;
4458multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4459  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4460  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4461}
4462
4463defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4464defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4465defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4466defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4467
4468defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4469defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4470defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4471defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4472
4473defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4474defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4475defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4476defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4477
4478defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4479defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4480defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4481
4482class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4483    : NVPTXInst<(outs),
4484                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4485                                intype:$b, intype:$a)),
4486                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4487                []>;
4488multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4489  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4490  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4491}
4492
4493defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4494defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4495defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4496
4497defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4498defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4499defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4500
4501defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4502defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4503defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4504
4505defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4506defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4507defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4508
4509class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4510    : NVPTXInst<(outs),
4511                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4512                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4513                []>;
4514multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4515  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4516  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4517}
4518
4519defm SUST_B_1D_ARRAY_B8_CLAMP
4520  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4521defm SUST_B_1D_ARRAY_B16_CLAMP
4522  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4523defm SUST_B_1D_ARRAY_B32_CLAMP
4524  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4525defm SUST_B_1D_ARRAY_B64_CLAMP
4526  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4527
4528defm SUST_B_1D_ARRAY_B8_TRAP
4529  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4530defm SUST_B_1D_ARRAY_B16_TRAP
4531  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4532defm SUST_B_1D_ARRAY_B32_TRAP
4533  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4534defm SUST_B_1D_ARRAY_B64_TRAP
4535  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4536
4537defm SUST_B_1D_ARRAY_B8_ZERO
4538  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4539defm SUST_B_1D_ARRAY_B16_ZERO
4540  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4541defm SUST_B_1D_ARRAY_B32_ZERO
4542  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4543defm SUST_B_1D_ARRAY_B64_ZERO
4544  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4545
4546defm SUST_P_1D_ARRAY_B8_TRAP
4547  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4548defm SUST_P_1D_ARRAY_B16_TRAP
4549  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4550defm SUST_P_1D_ARRAY_B32_TRAP
4551  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4552
4553class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4554    : NVPTXInst<(outs),
4555                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4556                                intype:$r, intype:$g)),
4557                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4558                []>;
4559multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4560  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4561  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4562}
4563
4564defm SUST_B_1D_ARRAY_V2B8_CLAMP
4565  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4566defm SUST_B_1D_ARRAY_V2B16_CLAMP
4567  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4568defm SUST_B_1D_ARRAY_V2B32_CLAMP
4569  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4570defm SUST_B_1D_ARRAY_V2B64_CLAMP
4571  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4572
4573defm SUST_B_1D_ARRAY_V2B8_TRAP
4574  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4575defm SUST_B_1D_ARRAY_V2B16_TRAP
4576  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4577defm SUST_B_1D_ARRAY_V2B32_TRAP
4578  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4579defm SUST_B_1D_ARRAY_V2B64_TRAP
4580  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4581
4582defm SUST_B_1D_ARRAY_V2B8_ZERO
4583  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4584defm SUST_B_1D_ARRAY_V2B16_ZERO
4585  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4586defm SUST_B_1D_ARRAY_V2B32_ZERO
4587  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4588defm SUST_B_1D_ARRAY_V2B64_ZERO
4589  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4590
4591defm SUST_P_1D_ARRAY_V2B8_TRAP
4592  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4593defm SUST_P_1D_ARRAY_V2B16_TRAP
4594  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4595defm SUST_P_1D_ARRAY_V2B32_TRAP
4596  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4597
4598class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4599    : NVPTXInst<(outs),
4600                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4601                                intype:$r, intype:$g, intype:$b, intype:$a)),
4602                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4603                []>;
4604multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4605  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4606  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4607}
4608
4609defm SUST_B_1D_ARRAY_V4B8_CLAMP
4610  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4611defm SUST_B_1D_ARRAY_V4B16_CLAMP
4612  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4613defm SUST_B_1D_ARRAY_V4B32_CLAMP
4614  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4615
4616defm SUST_B_1D_ARRAY_V4B8_TRAP
4617  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4618defm SUST_B_1D_ARRAY_V4B16_TRAP
4619  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4620defm SUST_B_1D_ARRAY_V4B32_TRAP
4621  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4622
4623defm SUST_B_1D_ARRAY_V4B8_ZERO
4624  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4625defm SUST_B_1D_ARRAY_V4B16_ZERO
4626  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4627defm SUST_B_1D_ARRAY_V4B32_ZERO
4628  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4629
4630defm SUST_P_1D_ARRAY_V4B8_TRAP
4631  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4632defm SUST_P_1D_ARRAY_V4B16_TRAP
4633  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4634defm SUST_P_1D_ARRAY_V4B32_TRAP
4635  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4636
4637class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4638    : NVPTXInst<(outs),
4639                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4640                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4641                []>;
4642multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4643  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4644  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4645}
4646
4647defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4648defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4649defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4650defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4651
4652defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4653defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4654defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4655defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4656
4657defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4658defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4659defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4660defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4661
4662defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4663defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4664defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4665
4666class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4667    : NVPTXInst<(outs),
4668                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4669                                intype:$r, intype:$g)),
4670                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4671                []>;
4672multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4673  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4674  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4675}
4676
4677defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4678defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4679defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4680defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4681
4682defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4683defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4684defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4685defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4686
4687defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4688defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4689defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4690defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4691
4692defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4693defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4694defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4695
4696class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4697    : NVPTXInst<(outs),
4698                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4699                                intype:$r, intype:$g, intype:$b, intype:$a)),
4700                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4701                []>;
4702multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4703  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4704  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4705}
4706
4707defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4708defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4709defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4710
4711defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4712defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4713defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4714
4715defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4716defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4717defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4718
4719defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4720defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4721defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4722
4723class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4724    : NVPTXInst<(outs),
4725                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4726                                intype:$r)),
4727                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4728                []>;
4729multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4730  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4731  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4732}
4733
4734defm SUST_B_2D_ARRAY_B8_CLAMP
4735  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4736defm SUST_B_2D_ARRAY_B16_CLAMP
4737  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4738defm SUST_B_2D_ARRAY_B32_CLAMP
4739  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4740defm SUST_B_2D_ARRAY_B64_CLAMP
4741  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4742
4743defm SUST_B_2D_ARRAY_B8_TRAP
4744  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4745defm SUST_B_2D_ARRAY_B16_TRAP
4746  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4747defm SUST_B_2D_ARRAY_B32_TRAP
4748  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4749defm SUST_B_2D_ARRAY_B64_TRAP
4750  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4751
4752defm SUST_B_2D_ARRAY_B8_ZERO
4753  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4754defm SUST_B_2D_ARRAY_B16_ZERO
4755  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4756defm SUST_B_2D_ARRAY_B32_ZERO
4757  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4758defm SUST_B_2D_ARRAY_B64_ZERO
4759  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4760
4761defm SUST_P_2D_ARRAY_B8_TRAP
4762  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4763defm SUST_P_2D_ARRAY_B16_TRAP
4764  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4765defm SUST_P_2D_ARRAY_B32_TRAP
4766  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4767
4768class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4769    : NVPTXInst<(outs),
4770                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4771                                intype:$r, intype:$g)),
4772                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4773                []>;
4774multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4775  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4776  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4777}
4778
4779defm SUST_B_2D_ARRAY_V2B8_CLAMP
4780  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4781defm SUST_B_2D_ARRAY_V2B16_CLAMP
4782  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4783defm SUST_B_2D_ARRAY_V2B32_CLAMP
4784  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4785defm SUST_B_2D_ARRAY_V2B64_CLAMP
4786  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4787
4788defm SUST_B_2D_ARRAY_V2B8_TRAP
4789  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4790defm SUST_B_2D_ARRAY_V2B16_TRAP
4791  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4792defm SUST_B_2D_ARRAY_V2B32_TRAP
4793  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4794defm SUST_B_2D_ARRAY_V2B64_TRAP
4795  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4796
4797defm SUST_B_2D_ARRAY_V2B8_ZERO
4798  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4799defm SUST_B_2D_ARRAY_V2B16_ZERO
4800  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4801defm SUST_B_2D_ARRAY_V2B32_ZERO
4802  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4803defm SUST_B_2D_ARRAY_V2B64_ZERO
4804  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4805
4806defm SUST_P_2D_ARRAY_V2B8_TRAP
4807  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4808defm SUST_P_2D_ARRAY_V2B16_TRAP
4809  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4810defm SUST_P_2D_ARRAY_V2B32_TRAP
4811  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4812
4813class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4814    : NVPTXInst<(outs),
4815                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4816                                intype:$r, intype:$g, intype:$b, intype:$a)),
4817                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4818                []>;
4819multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4820  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4821  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4822}
4823
4824defm SUST_B_2D_ARRAY_V4B8_CLAMP
4825  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4826defm SUST_B_2D_ARRAY_V4B16_CLAMP
4827  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4828defm SUST_B_2D_ARRAY_V4B32_CLAMP
4829  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4830
4831defm SUST_B_2D_ARRAY_V4B8_TRAP
4832  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4833defm SUST_B_2D_ARRAY_V4B16_TRAP
4834  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4835defm SUST_B_2D_ARRAY_V4B32_TRAP
4836  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4837
4838defm SUST_B_2D_ARRAY_V4B8_ZERO
4839  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4840defm SUST_B_2D_ARRAY_V4B16_ZERO
4841  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4842defm SUST_B_2D_ARRAY_V4B32_ZERO
4843  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4844
4845defm SUST_P_2D_ARRAY_V4B8_TRAP
4846  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4847defm SUST_P_2D_ARRAY_V4B16_TRAP
4848  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4849defm SUST_P_2D_ARRAY_V4B32_TRAP
4850  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4851
4852class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4853    : NVPTXInst<(outs),
4854                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4855                                intype:$r)),
4856                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4857                []>;
4858multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4859  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4860  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4861}
4862
4863defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4864defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4865defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4866defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4867
4868defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4869defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4870defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4871defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4872
4873defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4874defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4875defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4876defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4877
4878defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4879defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4880defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4881
4882class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4883    : NVPTXInst<(outs),
4884                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4885                                intype:$r, intype:$g)),
4886                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4887                []>;
4888multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4889  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4890  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4891}
4892
4893defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4894defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4895defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4896defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4897
4898defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4899defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4900defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4901defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4902
4903defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4904defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4905defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4906defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4907
4908defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4909defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4910defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4911
4912class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4913    : NVPTXInst<(outs),
4914                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4915                                intype:$r, intype:$g, intype:$b, intype:$a)),
4916                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
4917                []>;
4918multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
4919  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4920  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
4921}
4922
4923defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
4924defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
4925defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
4926
4927defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
4928defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
4929defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
4930
4931defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
4932defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
4933defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
4934
4935defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
4936defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
4937defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
4938
4939}
4940
4941// Surface store instruction patterns
4942// I'm not sure why we can't just include these in the instruction definitions,
4943// but TableGen complains of type errors :(
4944
4945// .clamp variant
4946def : Pat<(int_nvvm_sust_b_1d_i8_clamp
4947           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4948          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4949
4950def : Pat<(int_nvvm_sust_b_1d_i16_clamp
4951           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4952          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4953
4954def : Pat<(int_nvvm_sust_b_1d_i32_clamp
4955           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4956          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
4957
4958def : Pat<(int_nvvm_sust_b_1d_i64_clamp
4959           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4960          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
4961
4962def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
4963           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4964          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4965           Int16Regs:$r, Int16Regs:$g)>;
4966
4967def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
4968           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4969          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4970           Int16Regs:$r, Int16Regs:$g)>;
4971
4972def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
4973           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4974          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4975           Int32Regs:$r, Int32Regs:$g)>;
4976
4977def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
4978           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4979          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4980           Int64Regs:$r, Int64Regs:$g)>;
4981
4982def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
4983           Int64Regs:$s, Int32Regs:$x,
4984           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4985          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4986           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4987
4988def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
4989           Int64Regs:$s, Int32Regs:$x,
4990           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4991          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4992           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4993
4994def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
4995           Int64Regs:$s, Int32Regs:$x,
4996           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4997          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4998           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4999
5000
5001
5002def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
5003           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5004          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5005           Int16Regs:$r)>;
5006
5007def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
5008           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5009          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5010           Int16Regs:$r)>;
5011
5012def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
5013           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5014          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5015           Int32Regs:$r)>;
5016
5017def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
5018           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5019          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5020           Int64Regs:$r)>;
5021
5022def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
5023          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5024          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5025           Int16Regs:$r, Int16Regs:$g)>;
5026
5027def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
5028          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5029          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5030           Int16Regs:$r, Int16Regs:$g)>;
5031
5032def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
5033          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5034          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5035           Int32Regs:$r, Int32Regs:$g)>;
5036
5037def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
5038          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5039          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5040           Int64Regs:$r, Int64Regs:$g)>;
5041
5042def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
5043           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5044           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5045          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5046           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5047
5048def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
5049           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5050           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5051          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5052           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5053
5054def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
5055           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5056           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5057          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5058           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5059
5060
5061
5062def : Pat<(int_nvvm_sust_b_2d_i8_clamp
5063           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5064          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5065           Int16Regs:$r)>;
5066
5067def : Pat<(int_nvvm_sust_b_2d_i16_clamp
5068           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5069          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5070           Int16Regs:$r)>;
5071
5072def : Pat<(int_nvvm_sust_b_2d_i32_clamp
5073           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5074          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5075           Int32Regs:$r)>;
5076
5077def : Pat<(int_nvvm_sust_b_2d_i64_clamp
5078           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5079          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5080           Int64Regs:$r)>;
5081
5082def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
5083          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5084          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5085           Int16Regs:$r, Int16Regs:$g)>;
5086
5087def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
5088          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5089          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5090           Int16Regs:$r, Int16Regs:$g)>;
5091
5092def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
5093          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5094          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5095           Int32Regs:$r, Int32Regs:$g)>;
5096
5097def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
5098          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5099          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5100           Int64Regs:$r, Int64Regs:$g)>;
5101
5102def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
5103           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5104           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5105          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5106           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5107
5108def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
5109           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5110           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5111          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5112           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5113
5114def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
5115           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5116           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5117          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5118           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5119
5120
5121
5122def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
5123          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5124          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
5125           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5126           Int16Regs:$r)>;
5127
5128def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
5129          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5130          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
5131           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5132           Int16Regs:$r)>;
5133
5134def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
5135          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5136          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
5137           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5138           Int32Regs:$r)>;
5139
5140def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
5141          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5142          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
5143           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5144           Int64Regs:$r)>;
5145
5146def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
5147           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5148           Int16Regs:$r, Int16Regs:$g),
5149          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5150           Int32Regs:$x, Int32Regs:$y,
5151           Int16Regs:$r, Int16Regs:$g)>;
5152
5153def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
5154           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5155           Int16Regs:$r, Int16Regs:$g),
5156          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5157           Int32Regs:$x, Int32Regs:$y,
5158           Int16Regs:$r, Int16Regs:$g)>;
5159
5160def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
5161           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5162           Int32Regs:$g),
5163          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5164           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5165
5166def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
5167           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5168           Int64Regs:$g),
5169          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5170           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5171
5172def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
5173           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5174           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5175          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
5176           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5177           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5178
5179def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
5180           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5181           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5182          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
5183           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5184           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5185
5186def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
5187           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5188           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5189          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5190           Int32Regs:$x, Int32Regs:$y,
5191           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5192
5193
5194
5195def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5196           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5197           Int16Regs:$r),
5198          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5199           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5200           Int16Regs:$r)>;
5201
5202def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5203           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5204           Int16Regs:$r),
5205          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5206           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5207           Int16Regs:$r)>;
5208
5209def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5210           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5211           Int32Regs:$r),
5212          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5213           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5214           Int32Regs:$r)>;
5215
5216def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5217           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5218           Int64Regs:$r),
5219          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5220           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5221           Int64Regs:$r)>;
5222
5223def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5224           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5225           Int16Regs:$r, Int16Regs:$g),
5226          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5227           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5228           Int16Regs:$r, Int16Regs:$g)>;
5229
5230def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5231           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5232           Int16Regs:$r, Int16Regs:$g),
5233          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5234           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5235           Int16Regs:$r, Int16Regs:$g)>;
5236
5237def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5238           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5239           Int32Regs:$r, Int32Regs:$g),
5240          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5241           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5242           Int32Regs:$r, Int32Regs:$g)>;
5243
5244def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5245           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5246           Int64Regs:$r, Int64Regs:$g),
5247          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5248           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5249           Int64Regs:$r, Int64Regs:$g)>;
5250
5251def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5252           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5253           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5254          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5255           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5256           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5257
5258def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5259           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5260           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5261          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5262           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5263           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5264
5265def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5266           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5267           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5268          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5269           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5270           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5271
5272
5273// .trap variant
5274def : Pat<(int_nvvm_sust_b_1d_i8_trap
5275           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5276          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5277
5278def : Pat<(int_nvvm_sust_b_1d_i16_trap
5279           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5280          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5281
5282def : Pat<(int_nvvm_sust_b_1d_i32_trap
5283           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5284          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5285
5286def : Pat<(int_nvvm_sust_b_1d_i64_trap
5287           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5288          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5289
5290def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5291           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5292          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5293           Int16Regs:$r, Int16Regs:$g)>;
5294
5295def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5296           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5297          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5298           Int16Regs:$r, Int16Regs:$g)>;
5299
5300def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5301           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5302          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5303           Int32Regs:$r, Int32Regs:$g)>;
5304
5305def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5306           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5307          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5308           Int64Regs:$r, Int64Regs:$g)>;
5309
5310def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5311           Int64Regs:$s, Int32Regs:$x,
5312           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5313          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5314           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5315
5316def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5317           Int64Regs:$s, Int32Regs:$x,
5318           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5319          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5320           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5321
5322def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5323           Int64Regs:$s, Int32Regs:$x,
5324           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5325          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5326           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5327
5328
5329
5330def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5331           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5332          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5333           Int16Regs:$r)>;
5334
5335def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5336           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5337          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5338           Int16Regs:$r)>;
5339
5340def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5341           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5342          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5343           Int32Regs:$r)>;
5344
5345def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5346           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5347          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5348           Int64Regs:$r)>;
5349
5350def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5351          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5352          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5353           Int16Regs:$r, Int16Regs:$g)>;
5354
5355def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5356          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5357          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5358           Int16Regs:$r, Int16Regs:$g)>;
5359
5360def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5361          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5362          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5363           Int32Regs:$r, Int32Regs:$g)>;
5364
5365def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5366          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5367          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5368           Int64Regs:$r, Int64Regs:$g)>;
5369
5370def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5371           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5372           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5373          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5374           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5375
5376def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5377           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5378           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5379          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5380           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5381
5382def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5383           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5384           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5385          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5386           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5387
5388
5389
5390def : Pat<(int_nvvm_sust_b_2d_i8_trap
5391           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5392          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5393           Int16Regs:$r)>;
5394
5395def : Pat<(int_nvvm_sust_b_2d_i16_trap
5396           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5397          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5398           Int16Regs:$r)>;
5399
5400def : Pat<(int_nvvm_sust_b_2d_i32_trap
5401           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5402          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5403           Int32Regs:$r)>;
5404
5405def : Pat<(int_nvvm_sust_b_2d_i64_trap
5406           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5407          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5408           Int64Regs:$r)>;
5409
5410def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5411          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5412          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5413           Int16Regs:$r, Int16Regs:$g)>;
5414
5415def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5416          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5417          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5418           Int16Regs:$r, Int16Regs:$g)>;
5419
5420def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5421          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5422          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5423           Int32Regs:$r, Int32Regs:$g)>;
5424
5425def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5426          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5427          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5428           Int64Regs:$r, Int64Regs:$g)>;
5429
5430def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5431           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5432           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5433          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5434           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5435
5436def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5437           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5438           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5439          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5440           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5441
5442def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5443           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5444           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5445          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5446           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5447
5448
5449
5450def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5451          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5452          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5453           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5454           Int16Regs:$r)>;
5455
5456def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5457          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5458          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5459           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5460           Int16Regs:$r)>;
5461
5462def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5463          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5464          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5465           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5466           Int32Regs:$r)>;
5467
5468def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5469          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5470          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5471           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5472           Int64Regs:$r)>;
5473
5474def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5475           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5476           Int16Regs:$r, Int16Regs:$g),
5477          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5478           Int32Regs:$x, Int32Regs:$y,
5479           Int16Regs:$r, Int16Regs:$g)>;
5480
5481def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5482           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5483           Int16Regs:$r, Int16Regs:$g),
5484          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5485           Int32Regs:$x, Int32Regs:$y,
5486           Int16Regs:$r, Int16Regs:$g)>;
5487
5488def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5489           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5490           Int32Regs:$g),
5491          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5492           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5493
5494def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5495           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5496           Int64Regs:$g),
5497          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5498           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5499
5500def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5501           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5502           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5503          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5504           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5505           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5506
5507def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5508           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5509           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5510          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5511           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5512           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5513
5514def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5515           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5516           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5517          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5518           Int32Regs:$x, Int32Regs:$y,
5519           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5520
5521
5522
5523def : Pat<(int_nvvm_sust_b_3d_i8_trap
5524           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5525           Int16Regs:$r),
5526          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5527           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5528           Int16Regs:$r)>;
5529
5530def : Pat<(int_nvvm_sust_b_3d_i16_trap
5531           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5532           Int16Regs:$r),
5533          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5534           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5535           Int16Regs:$r)>;
5536
5537def : Pat<(int_nvvm_sust_b_3d_i32_trap
5538           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5539           Int32Regs:$r),
5540          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5541           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5542           Int32Regs:$r)>;
5543
5544def : Pat<(int_nvvm_sust_b_3d_i64_trap
5545           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5546           Int64Regs:$r),
5547          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5548           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5549           Int64Regs:$r)>;
5550
5551def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5552           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5553           Int16Regs:$r, Int16Regs:$g),
5554          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5555           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5556           Int16Regs:$r, Int16Regs:$g)>;
5557
5558def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5559           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5560           Int16Regs:$r, Int16Regs:$g),
5561          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5562           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5563           Int16Regs:$r, Int16Regs:$g)>;
5564
5565def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5566           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5567           Int32Regs:$r, Int32Regs:$g),
5568          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5569           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5570           Int32Regs:$r, Int32Regs:$g)>;
5571
5572def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5573           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5574           Int64Regs:$r, Int64Regs:$g),
5575          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5576           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5577           Int64Regs:$r, Int64Regs:$g)>;
5578
5579def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5580           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5581           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5582          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5583           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5584           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5585
5586def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5587           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5588           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5589          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5590           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5591           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5592
5593def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5594           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5595           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5596          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5597           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5598           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5599
5600
5601// .zero variant
5602def : Pat<(int_nvvm_sust_b_1d_i8_zero
5603           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5604          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5605
5606def : Pat<(int_nvvm_sust_b_1d_i16_zero
5607           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5608          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5609
5610def : Pat<(int_nvvm_sust_b_1d_i32_zero
5611           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5612          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5613
5614def : Pat<(int_nvvm_sust_b_1d_i64_zero
5615           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5616          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5617
5618def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5619           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5620          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5621           Int16Regs:$r, Int16Regs:$g)>;
5622
5623def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5624           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5625          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5626           Int16Regs:$r, Int16Regs:$g)>;
5627
5628def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5629           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5630          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5631           Int32Regs:$r, Int32Regs:$g)>;
5632
5633def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5634           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5635          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5636           Int64Regs:$r, Int64Regs:$g)>;
5637
5638def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5639           Int64Regs:$s, Int32Regs:$x,
5640           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5641          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5642           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5643
5644def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5645           Int64Regs:$s, Int32Regs:$x,
5646           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5647          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5648           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5649
5650def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5651           Int64Regs:$s, Int32Regs:$x,
5652           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5653          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5654           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5655
5656
5657
5658def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5659           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5660          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5661           Int16Regs:$r)>;
5662
5663def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5664           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5665          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5666           Int16Regs:$r)>;
5667
5668def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5669           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5670          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5671           Int32Regs:$r)>;
5672
5673def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5674           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5675          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5676           Int64Regs:$r)>;
5677
5678def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5679          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5680          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5681           Int16Regs:$r, Int16Regs:$g)>;
5682
5683def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5684          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5685          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5686           Int16Regs:$r, Int16Regs:$g)>;
5687
5688def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5689          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5690          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5691           Int32Regs:$r, Int32Regs:$g)>;
5692
5693def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5694          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5695          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5696           Int64Regs:$r, Int64Regs:$g)>;
5697
5698def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5699           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5700           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5701          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5702           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5703
5704def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5705           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5706           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5707          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5708           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5709
5710def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5711           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5712           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5713          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5714           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5715
5716
5717
5718def : Pat<(int_nvvm_sust_b_2d_i8_zero
5719           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5720          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5721           Int16Regs:$r)>;
5722
5723def : Pat<(int_nvvm_sust_b_2d_i16_zero
5724           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5725          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5726           Int16Regs:$r)>;
5727
5728def : Pat<(int_nvvm_sust_b_2d_i32_zero
5729           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5730          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5731           Int32Regs:$r)>;
5732
5733def : Pat<(int_nvvm_sust_b_2d_i64_zero
5734           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5735          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5736           Int64Regs:$r)>;
5737
5738def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5739          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5740          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5741           Int16Regs:$r, Int16Regs:$g)>;
5742
5743def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5744          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5745          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5746           Int16Regs:$r, Int16Regs:$g)>;
5747
5748def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5749          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5750          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5751           Int32Regs:$r, Int32Regs:$g)>;
5752
5753def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5754          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5755          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5756           Int64Regs:$r, Int64Regs:$g)>;
5757
5758def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5759           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5760           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5761          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5762           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5763
5764def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5765           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5766           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5767          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5768           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5769
5770def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5771           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5772           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5773          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5774           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5775
5776
5777
5778def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5779          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5780          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5781           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5782           Int16Regs:$r)>;
5783
5784def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5785          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5786          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5787           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5788           Int16Regs:$r)>;
5789
5790def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5791          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5792          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5793           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5794           Int32Regs:$r)>;
5795
5796def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5797          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5798          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5799           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5800           Int64Regs:$r)>;
5801
5802def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5803           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5804           Int16Regs:$r, Int16Regs:$g),
5805          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5806           Int32Regs:$x, Int32Regs:$y,
5807           Int16Regs:$r, Int16Regs:$g)>;
5808
5809def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5810           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5811           Int16Regs:$r, Int16Regs:$g),
5812          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5813           Int32Regs:$x, Int32Regs:$y,
5814           Int16Regs:$r, Int16Regs:$g)>;
5815
5816def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5817           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5818           Int32Regs:$g),
5819          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5820           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5821
5822def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5823           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5824           Int64Regs:$g),
5825          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5826           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5827
5828def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5829           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5830           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5831          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5832           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5833           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5834
5835def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5836           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5837           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5838          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5839           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5840           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5841
5842def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5843           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5844           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5845          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5846           Int32Regs:$x, Int32Regs:$y,
5847           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5848
5849
5850
5851def : Pat<(int_nvvm_sust_b_3d_i8_zero
5852           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5853           Int16Regs:$r),
5854          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5855           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5856           Int16Regs:$r)>;
5857
5858def : Pat<(int_nvvm_sust_b_3d_i16_zero
5859           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5860           Int16Regs:$r),
5861          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5862           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5863           Int16Regs:$r)>;
5864
5865def : Pat<(int_nvvm_sust_b_3d_i32_zero
5866           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5867           Int32Regs:$r),
5868          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5869           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5870           Int32Regs:$r)>;
5871
5872def : Pat<(int_nvvm_sust_b_3d_i64_zero
5873           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5874           Int64Regs:$r),
5875          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5876           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5877           Int64Regs:$r)>;
5878
5879def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5880           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5881           Int16Regs:$r, Int16Regs:$g),
5882          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5883           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5884           Int16Regs:$r, Int16Regs:$g)>;
5885
5886def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5887           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5888           Int16Regs:$r, Int16Regs:$g),
5889          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5890           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5891           Int16Regs:$r, Int16Regs:$g)>;
5892
5893def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5894           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5895           Int32Regs:$r, Int32Regs:$g),
5896          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5897           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5898           Int32Regs:$r, Int32Regs:$g)>;
5899
5900def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5901           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5902           Int64Regs:$r, Int64Regs:$g),
5903          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5904           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5905           Int64Regs:$r, Int64Regs:$g)>;
5906
5907def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5908           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5909           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5910          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5911           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5912           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5913
5914def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
5915           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5916           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5917          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
5918           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5919           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5920
5921def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
5922           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5923           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5924          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
5925           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5926           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5927
5928
5929
5930
5931def : Pat<(int_nvvm_sust_p_1d_i8_trap
5932           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5933          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5934
5935def : Pat<(int_nvvm_sust_p_1d_i16_trap
5936           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5937          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5938
5939def : Pat<(int_nvvm_sust_p_1d_i32_trap
5940           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5941          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5942
5943def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
5944           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5945          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5946           Int16Regs:$r, Int16Regs:$g)>;
5947
5948def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
5949           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5950          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5951           Int16Regs:$r, Int16Regs:$g)>;
5952
5953def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
5954           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5955          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5956           Int32Regs:$r, Int32Regs:$g)>;
5957
5958def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
5959           Int64Regs:$s, Int32Regs:$x,
5960           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5961          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5962           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5963
5964def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
5965           Int64Regs:$s, Int32Regs:$x,
5966           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5967          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5968           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5969
5970def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
5971           Int64Regs:$s, Int32Regs:$x,
5972           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5973          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5974           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5975
5976
5977
5978def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
5979           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5980          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5981           Int16Regs:$r)>;
5982
5983def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
5984           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5985          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5986           Int16Regs:$r)>;
5987
5988def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
5989           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5990          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5991           Int32Regs:$r)>;
5992
5993def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
5994          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5995          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5996           Int16Regs:$r, Int16Regs:$g)>;
5997
5998def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
5999          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6000          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6001           Int16Regs:$r, Int16Regs:$g)>;
6002
6003def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
6004          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6005          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6006           Int32Regs:$r, Int32Regs:$g)>;
6007
6008def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
6009           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6010           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6011          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6012           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6013
6014def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
6015           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6016           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6017          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6018           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6019
6020def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
6021           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6022           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6023          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
6024           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6025
6026
6027
6028def : Pat<(int_nvvm_sust_p_2d_i8_trap
6029           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6030          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6031           Int16Regs:$r)>;
6032
6033def : Pat<(int_nvvm_sust_p_2d_i16_trap
6034           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6035          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6036           Int16Regs:$r)>;
6037
6038def : Pat<(int_nvvm_sust_p_2d_i32_trap
6039           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6040          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6041           Int32Regs:$r)>;
6042
6043def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
6044          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6045          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6046           Int16Regs:$r, Int16Regs:$g)>;
6047
6048def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
6049          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6050          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6051           Int16Regs:$r, Int16Regs:$g)>;
6052
6053def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
6054          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6055          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6056           Int32Regs:$r, Int32Regs:$g)>;
6057
6058def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
6059           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6060           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6061          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6062           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6063
6064def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
6065           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6066           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6067          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6068           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6069
6070def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
6071           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6072           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6073          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
6074           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6075
6076
6077
6078def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
6079          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6080          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
6081           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6082           Int16Regs:$r)>;
6083
6084def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
6085          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6086          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
6087           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6088           Int16Regs:$r)>;
6089
6090def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
6091          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6092          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
6093           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6094           Int32Regs:$r)>;
6095
6096def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
6097           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6098           Int16Regs:$r, Int16Regs:$g),
6099          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
6100           Int32Regs:$x, Int32Regs:$y,
6101           Int16Regs:$r, Int16Regs:$g)>;
6102
6103def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
6104           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6105           Int16Regs:$r, Int16Regs:$g),
6106          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
6107           Int32Regs:$x, Int32Regs:$y,
6108           Int16Regs:$r, Int16Regs:$g)>;
6109
6110def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
6111           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
6112           Int32Regs:$g),
6113          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6114           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
6115
6116def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
6117           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6118           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6119          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
6120           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6121           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6122
6123def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
6124           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6125           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6126          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
6127           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6128           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6129
6130def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
6131           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
6132           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6133          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
6134           Int32Regs:$x, Int32Regs:$y,
6135           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6136
6137
6138
6139def : Pat<(int_nvvm_sust_p_3d_i8_trap
6140           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6141           Int16Regs:$r),
6142          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
6143           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6144           Int16Regs:$r)>;
6145
6146def : Pat<(int_nvvm_sust_p_3d_i16_trap
6147           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6148           Int16Regs:$r),
6149          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
6150           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6151           Int16Regs:$r)>;
6152
6153def : Pat<(int_nvvm_sust_p_3d_i32_trap
6154           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6155           Int32Regs:$r),
6156          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
6157           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6158           Int32Regs:$r)>;
6159
6160def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
6161           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6162           Int16Regs:$r, Int16Regs:$g),
6163          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
6164           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6165           Int16Regs:$r, Int16Regs:$g)>;
6166
6167def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
6168           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6169           Int16Regs:$r, Int16Regs:$g),
6170          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
6171           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6172           Int16Regs:$r, Int16Regs:$g)>;
6173
6174def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
6175           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6176           Int32Regs:$r, Int32Regs:$g),
6177          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
6178           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6179           Int32Regs:$r, Int32Regs:$g)>;
6180
6181def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
6182           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6183           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6184          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
6185           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6186           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6187
6188def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6189           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6190           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6191          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6192           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6193           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6194
6195def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6196           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6197           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6198          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6199           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6200           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6201
6202//-----------------------------------
6203// Read Special Registers
6204//-----------------------------------
6205
6206class PTX_READ_SREG_R64<string regname, Intrinsic intop>
6207  : NVPTXInst<(outs Int64Regs:$d), (ins),
6208              !strconcat("mov.u64 \t$d, %", regname, ";"),
6209              [(set Int64Regs:$d, (intop))]>;
6210
6211class PTX_READ_SREG_R32<string regname, Intrinsic intop>
6212  : NVPTXInst<(outs Int32Regs:$d), (ins),
6213              !strconcat("mov.u32 \t$d, %", regname, ";"),
6214              [(set Int32Regs:$d, (intop))]>;
6215
6216// TODO Add read vector-version of special registers
6217
6218def INT_PTX_SREG_TID_X :
6219    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
6220def INT_PTX_SREG_TID_Y :
6221    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
6222def INT_PTX_SREG_TID_Z :
6223    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
6224def INT_PTX_SREG_TID_W :
6225    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
6226
6227def INT_PTX_SREG_NTID_X :
6228    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
6229def INT_PTX_SREG_NTID_Y :
6230    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
6231def INT_PTX_SREG_NTID_Z :
6232    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
6233def INT_PTX_SREG_NTID_W :
6234    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
6235
6236def INT_PTX_SREG_LANEID :
6237    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6238def INT_PTX_SREG_WARPID :
6239    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6240def INT_PTX_SREG_NWARPID :
6241    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6242
6243def INT_PTX_SREG_CTAID_X :
6244    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
6245def INT_PTX_SREG_CTAID_Y :
6246    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
6247def INT_PTX_SREG_CTAID_Z :
6248    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
6249def INT_PTX_SREG_CTAID_W :
6250    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
6251
6252def INT_PTX_SREG_NCTAID_X :
6253    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
6254def INT_PTX_SREG_NCTAID_Y :
6255    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
6256def INT_PTX_SREG_NCTAID_Z :
6257    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
6258def INT_PTX_SREG_NCTAID_W :
6259    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
6260
6261def INT_PTX_SREG_SMID :
6262    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6263def INT_PTX_SREG_NSMID :
6264    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6265def INT_PTX_SREG_GRIDID :
6266    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6267
6268def INT_PTX_SREG_LANEMASK_EQ :
6269    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6270def INT_PTX_SREG_LANEMASK_LE :
6271    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6272def INT_PTX_SREG_LANEMASK_LT :
6273    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6274def INT_PTX_SREG_LANEMASK_GE :
6275    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6276def INT_PTX_SREG_LANEMASK_GT :
6277    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6278
6279def INT_PTX_SREG_CLOCK :
6280    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6281def INT_PTX_SREG_CLOCK64 :
6282    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6283
6284def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6285def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6286def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6287def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6288
6289// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6290// handle the constant.
6291def INT_PTX_SREG_WARPSIZE :
6292    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6293              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6294
6295// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6296// In addition to target-independent fields provided by WMMA_REGS, it adds
6297// the fields commonly used to implement specific PTX instruction -- register
6298// types and names, constraints, parts of assembly, etc.
6299class WMMA_REGINFO<WMMA_REGS r, string op>
6300      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6301  // NVPTX register types used to carry fragment data.
6302  NVPTXRegClass regclass = !cond(
6303    !eq(ptx_elt_type, "f16") : Float16x2Regs,
6304    !eq(ptx_elt_type, "f32") : Float32Regs,
6305    !eq(ptx_elt_type, "f64") : Float64Regs,
6306    !eq(ptx_elt_type, "bf16") : Int32Regs,
6307    !eq(ptx_elt_type, "tf32") : Int32Regs,
6308    !eq(ptx_elt_type, "s32") : Int32Regs,
6309    !eq(ptx_elt_type, "b16") : Int32Regs,
6310    !eq(ptx_elt_type, "s8") : Int32Regs,
6311    !eq(ptx_elt_type, "u8") : Int32Regs,
6312    !eq(ptx_elt_type, "s4") : Int32Regs,
6313    !eq(ptx_elt_type, "u4") : Int32Regs,
6314    !eq(ptx_elt_type, "b1") : Int32Regs);
6315
6316  // Instruction input/output arguments for the fragment.
6317  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6318
6319  // List of register names for the fragment -- ["ra0", "ra1",...]
6320  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6321
6322  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6323  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6324
6325  // Predicates for particular fragment variant. Technically those are
6326  // per-instruction predicates, but currently all fragments that can be used in
6327  // a given instruction are subject to the same constraints, so an instruction
6328  // can use predicates from any of its fragments. If/when this is no
6329  // longer the case, we can concat all per-fragment predicates to enforce that
6330  // all fragments of the instruction are viable.
6331  list<Predicate> Predicates = !cond(
6332    // fp16 -> fp16/fp32 @ m16n16k16
6333    !and(!eq(geom, "m16n16k16"),
6334         !or(!eq(ptx_elt_type, "f16"),
6335             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
6336
6337    !and(!eq(geom,"m8n8k4"),
6338         !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
6339
6340    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6341    !and(!or(!eq(geom, "m8n32k16"),
6342             !eq(geom, "m32n8k16")),
6343         !or(!eq(ptx_elt_type, "f16"),
6344             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
6345
6346    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6347    !and(!or(!eq(geom,"m16n16k16"),
6348             !eq(geom,"m8n32k16"),
6349             !eq(geom,"m32n8k16")),
6350         !or(!eq(ptx_elt_type, "u8"),
6351             !eq(ptx_elt_type, "s8"),
6352             !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
6353
6354    !and(!or(!eq(geom,"m16n16k16"),
6355             !eq(geom,"m8n32k16"),
6356             !eq(geom,"m32n8k16")),
6357         !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
6358
6359    !and(!eq(geom,"m16n16k8"),
6360         !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
6361
6362    !and(!eq(geom,"m16n16k8"),
6363         !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
6364
6365    // b1 -> s32 @ m8n8k128(b1)
6366    !and(!ne(op,"mma"),
6367         !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
6368
6369    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6370    !and(!ne(op,"mma"),
6371         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
6372
6373    !or(!eq(geom,"m16n8k8"),
6374        !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
6375
6376    !and(!ne(ptx_elt_type,"f64"),
6377         !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
6378
6379    // mma m8n8k32 requires higher PTX version
6380    !and(!eq(op,"mma"),
6381         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
6382
6383    !and(!eq(ptx_elt_type,"f64"),
6384         !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
6385
6386    !and(!eq(op,"mma"),
6387         !or(!eq(geom, "m16n8k16"),
6388             !eq(geom, "m16n8k4"),
6389             !eq(geom, "m16n8k32"),
6390             !eq(geom, "m16n8k64"),
6391             !eq(geom, "m8n8k128"),
6392             !eq(geom, "m16n8k128"),
6393             !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
6394
6395    !and(!eq(op,"ldmatrix"),
6396         !eq(ptx_elt_type,"b16"),
6397         !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
6398
6399  // template DAGs for instruction inputs/output.
6400  dag Outs = !dag(outs, ptx_regs, reg_names);
6401  dag Ins = !dag(ins, ptx_regs, reg_names);
6402}
6403
6404// Convert dag of arguments into a dag to match given intrinsic.
6405class BuildPatternI<Intrinsic Intr, dag Ins> {
6406  // Build a dag pattern that matches the intrinsic call.
6407  dag ret = !foreach(tmp, Ins,
6408                          !subst(imem, ADDRvar,
6409                          !subst(MEMri64, ADDRri64,
6410                          !subst(MEMri, ADDRri,
6411                          !subst(ins, Intr, tmp)))));
6412}
6413
6414// Same as above, but uses PatFrag instead of an Intrinsic.
6415class BuildPatternPF<PatFrag Intr, dag Ins> {
6416  // Build a dag pattern that matches the intrinsic call.
6417  dag ret = !foreach(tmp, Ins,
6418                          !subst(imem, ADDRvar,
6419                          !subst(MEMri64, ADDRri64,
6420                          !subst(MEMri, ADDRri,
6421                          !subst(ins, Intr, tmp)))));
6422}
6423
6424// Common WMMA-related fields used for building patterns for all MMA instructions.
6425class WMMA_INSTR<string _Intr, list<dag> _Args>
6426  : NVPTXInst<(outs), (ins), "?", []> {
6427  Intrinsic Intr = !cast<Intrinsic>(_Intr);
6428  // Concatenate all arguments into a single dag.
6429  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6430  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6431  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6432}
6433
6434//
6435// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6436//
6437
6438class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6439                DAGOperand SrcOp>
6440  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6441                              [!con((ins SrcOp:$src),
6442                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6443    Requires<Frag.Predicates> {
6444  // Load/store intrinsics are overloaded on pointer's address space.
6445  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6446  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6447  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6448  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6449  // Build PatFrag that only matches particular address space.
6450  PatFrag IntrFrag = PatFrag<PFOperands,
6451                             PFOperandsIntr,
6452                             !cond(!eq(Space, ".shared"): AS_match.shared,
6453                                   !eq(Space, ".global"): AS_match.global,
6454                                   true: AS_match.generic)>;
6455  // Build AS-constrained pattern.
6456  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6457
6458  let OutOperandList = Frag.Outs;
6459  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6460  let AsmString = "wmma.load."
6461                  # Frag.frag
6462                  # ".sync"
6463                  # "${ptx:aligned}"
6464                  # "." # Layout
6465                  # "." # Frag.geom
6466                  # Space
6467                  # "." # Frag.ptx_elt_type # " \t"
6468                  # Frag.regstring
6469                  # ", [$src]"
6470                  # !if(WithStride, ", $ldm", "")
6471                  # ";";
6472}
6473
6474//
6475// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6476//
6477class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6478                   bit WithStride, DAGOperand DstOp>
6479  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6480               [!con((ins DstOp:$dst),
6481                     Frag.Ins,
6482                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6483    Requires<Frag.Predicates> {
6484
6485  // Load/store intrinsics are overloaded on pointer's address space.
6486  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6487  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6488  dag PFOperands = !con((ops node:$dst),
6489                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6490                        !if(WithStride, (ops node:$ldm), (ops)));
6491  // Build PatFrag that only matches particular address space.
6492  PatFrag IntrFrag = PatFrag<PFOperands,
6493                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6494                             !cond(!eq(Space, ".shared"): AS_match.shared,
6495                                   !eq(Space, ".global"): AS_match.global,
6496                                   true: AS_match.generic)>;
6497  // Build AS-constrained pattern.
6498  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6499
6500  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6501  let OutOperandList = (outs);
6502  let AsmString = "wmma.store.d.sync"
6503                  # "${ptx:aligned}"
6504                  # "." # Layout
6505                  # "." # Frag.geom
6506                  # Space
6507                  # "." # Frag.ptx_elt_type
6508                  # " \t[$dst],"
6509                  # Frag.regstring
6510                  # !if(WithStride, ", $ldm", "")
6511                  # ";";
6512}
6513
6514// Create all load/store variants
6515defset list<WMMA_INSTR> MMA_LDSTs  = {
6516  foreach layout = ["row", "col"] in {
6517    foreach stride = [false, true] in {
6518      foreach space = [".global", ".shared", ""] in {
6519        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6520          foreach frag = NVVM_MMA_OPS.all_ld_ops in
6521            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6522              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6523          foreach frag = NVVM_MMA_OPS.all_st_ops in
6524            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6525              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6526        } // addr
6527      } // space
6528    } // stride
6529  } // layout
6530} // defset
6531
6532// B1 instruction variants need extra constraints.
6533class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6534  string Op = b1op;
6535  WMMA_REGINFO Frag = FragA;
6536  list<Predicate> ret = !listconcat(
6537    FragA.Predicates,
6538    !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
6539  );
6540}
6541// WMMA.MMA
6542class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6543               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6544               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6545  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6546                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
6547    // Requires does not seem to have effect on Instruction w/o Patterns.
6548    // We set it here anyways and propagate to the Pat<> we construct below.
6549    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6550  let OutOperandList = FragD.Outs;
6551  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6552  string TypeList = !cond(
6553    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6554                                     # "." # FragC.ptx_elt_type,
6555    1: "." # FragD.ptx_elt_type
6556       # "." # FragA.ptx_elt_type
6557       # "." # FragB.ptx_elt_type
6558       # "." # FragC.ptx_elt_type,
6559  );
6560  let AsmString = "wmma.mma"
6561                  # b1op
6562                  # ".sync"
6563                  # "${ptx:aligned}"
6564                  # "." # ALayout
6565                  # "." # BLayout
6566                  # "." # FragA.geom
6567                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6568                  # TypeList
6569                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6570                  # FragD.regstring # ",\n\t\t"
6571                  # FragA.regstring # ",\n\t\t"
6572                  # FragB.regstring # ",\n\t\t"
6573                  # FragC.regstring # ";";
6574}
6575
6576defset list<WMMA_INSTR> WMMAs  = {
6577  foreach layout_a = ["row", "col"] in {
6578    foreach layout_b = ["row", "col"] in {
6579      foreach satf = [0, 1] in {
6580        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6581          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6582            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6583              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6584                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6585                              WMMA_REGINFO<op[1], "wmma.mma">,
6586                              WMMA_REGINFO<op[2], "wmma.mma">,
6587                              WMMA_REGINFO<op[3], "wmma.mma">,
6588                              layout_a, layout_b, satf, rnd, b1op>;
6589              }
6590            } // b1op
6591          } // op
6592        } // rnd
6593      } // satf
6594    } // layout_b
6595  } // layout_a
6596} // defset
6597
6598// MMA
6599class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6600               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6601               string ALayout, string BLayout, int Satfinite, string b1op>
6602  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6603                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
6604    // Requires does not seem to have effect on Instruction w/o Patterns.
6605    // We set it here anyways and propagate to the Pat<> we construct below.
6606  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6607  let OutOperandList = FragD.Outs;
6608  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6609  string TypeList = "." # FragD.ptx_elt_type
6610                    # "." # FragA.ptx_elt_type
6611                    # "." # FragB.ptx_elt_type
6612                    # "." # FragC.ptx_elt_type;
6613  let AsmString = "mma.sync.aligned."
6614                  # FragA.geom
6615                  # "." # ALayout
6616                  # "." # BLayout
6617                  # !if(Satfinite, ".satfinite", "")
6618                  # TypeList
6619                  # b1op # "\n\t\t"
6620                  # FragD.regstring # ",\n\t\t"
6621                  # FragA.regstring # ",\n\t\t"
6622                  # FragB.regstring # ",\n\t\t"
6623                  # FragC.regstring # ";";
6624}
6625
6626defset list<WMMA_INSTR> MMAs  = {
6627  foreach layout_a = ["row", "col"] in {
6628    foreach layout_b = ["row", "col"] in {
6629      foreach satf = [0, 1] in {
6630        foreach op = NVVM_MMA_OPS.all_mma_ops in {
6631          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6632            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6633              def : MMA<WMMA_REGINFO<op[0], "mma">,
6634                        WMMA_REGINFO<op[1], "mma">,
6635                        WMMA_REGINFO<op[2], "mma">,
6636                        WMMA_REGINFO<op[3], "mma">,
6637                        layout_a, layout_b, satf, b1op>;
6638            }
6639          } // b1op
6640        } // op
6641      } // satf
6642    } // layout_b
6643  } // layout_a
6644} // defset
6645
6646//
6647// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6648//
6649class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6650               DAGOperand SrcOp>
6651  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6652    Requires<Frag.Predicates> {
6653  // Build PatFrag that only matches particular address space.
6654  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6655                             !cond(!eq(Space, ".shared"): AS_match.shared,
6656                                   true: AS_match.generic)>;
6657  // Build AS-constrained pattern.
6658  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6659
6660  let OutOperandList = Frag.Outs;
6661  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6662  let AsmString = "ldmatrix.sync.aligned."
6663                  # Frag.geom
6664                  # "." # Frag.frag
6665                  # !if(Transposed, ".trans", "")
6666                  # Space
6667                  # "." # Frag.ptx_elt_type
6668                  # " " # Frag.regstring # ", [$src];";
6669}
6670
6671// Create all ldmatrix variants
6672defset list<WMMA_INSTR> LDMATRIXs  = {
6673  foreach transposed = [false, true] in {
6674    foreach space = [".shared", ""] in {
6675      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6676        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6677          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6678            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6679                            addr>;
6680      } // addr
6681    } // space
6682  } // transposed
6683} // defset
6684
6685// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6686// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6687// the instruction record.
6688class MMA_PAT<WMMA_INSTR wi>
6689      : Pat<wi.IntrinsicPattern,
6690            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6691                 (wi ptx.version))>,
6692        Requires<wi.Predicates>;
6693
6694// Build intrinsic->instruction patterns for all MMA instructions.
6695foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6696  def : MMA_PAT<mma>;
6697