xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (revision 1165fc9a526630487a1feb63daef65c5aee1a583)
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX60, hasSM30]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX60, hasSM30]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX60, hasSM30]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX60, hasSM30]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX60, hasSM30]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX60, hasSM30]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX60, hasSM30]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX60, hasSM30]>;
135
136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137                 bit offset_imm, bit mask_imm, bit threadmask_imm>
138      : NVPTXInst<(outs), (ins), "?", []> {
139  NVPTXRegClass rc = !cond(
140    !eq(reg, "i32"): Int32Regs,
141    !eq(reg, "f32"): Float32Regs);
142  string IntrName = "int_nvvm_shfl_"
143                    # !if(sync, "sync_", "")
144                    # mode
145                    # "_" # reg
146                    # !if(return_pred, "p", "");
147  Intrinsic Intr = !cast<Intrinsic>(IntrName);
148  let InOperandList = !con(
149    !if(sync,
150        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
151        (ins)),
152    (ins rc:$src),
153    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
155    );
156  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157  let AsmString = "shfl."
158     # !if(sync, "sync.", "")
159     # mode # ".b32\t"
160     # "$dst"
161     # !if(return_pred, "|$pred", "") # ", "
162     # "$src, $offset, $mask"
163     # !if(sync, ", $threadmask", "")
164     # ";"
165     ;
166  let Pattern = [!con(
167      !foreach(tmp, OutOperandList,
168             !subst(outs, set,
169             !subst(i32imm, imm, tmp))),
170      (set !foreach(tmp, InOperandList,
171             !subst(ins, Intr,
172             !subst(i32imm, imm, tmp))))
173  )];
174}
175
176foreach sync = [false, true] in {
177  foreach mode = ["up", "down", "bfly", "idx"] in {
178    foreach regclass = ["i32", "f32"] in {
179      foreach return_pred = [false, true] in {
180        foreach offset_imm = [false, true] in {
181          foreach mask_imm = [false, true] in {
182            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183              def : SHFL_INSTR<sync, mode, regclass, return_pred,
184                               offset_imm, mask_imm, threadmask_imm>,
185                    Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
186            }
187          }
188        }
189      }
190    }
191  }
192}
193
194// vote.{all,any,uni,ballot}
195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197              "vote." # mode # " \t$dest, $pred;",
198              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199        Requires<[hasPTX60, hasSM30]>;
200}
201
202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
206
207// vote.sync.{all,any,uni,ballot}
208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210              "vote.sync." # mode # " \t$dest, $pred, $mask;",
211              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212          Requires<[hasPTX60, hasSM30]>;
213  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214              "vote.sync." # mode #" \t$dest, $pred, $mask;",
215              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216          Requires<[hasPTX60, hasSM30]>;
217}
218
219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
223
224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
225                          Operand ImmOp> {
226  def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228              [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229           Requires<[hasPTX60, hasSM70]>;
230  def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232              [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233           Requires<[hasPTX60, hasSM70]>;
234  def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236              [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237           Requires<[hasPTX60, hasSM70]>;
238  def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240              [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241           Requires<[hasPTX60, hasSM70]>;
242}
243
244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
245                                        i32imm>;
246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
247                                        i64imm>;
248
249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
250                          Operand ImmOp> {
251  def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252                     (ins i32imm:$mask, ImmOp:$value),
253              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255           Requires<[hasPTX60, hasSM70]>;
256  def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257                     (ins Int32Regs:$mask, ImmOp:$value),
258              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260           Requires<[hasPTX60, hasSM70]>;
261  def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262                     (ins i32imm:$mask, regclass:$value),
263              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265           Requires<[hasPTX60, hasSM70]>;
266  def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267                     (ins Int32Regs:$mask, regclass:$value),
268              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270           Requires<[hasPTX60, hasSM70]>;
271}
272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
273                                         i32imm>;
274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
275                                         i64imm>;
276
277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
278  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
279          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
280          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
281        Requires<[hasPTX70, hasSM80]>;
282}
283
284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
292
293} // isConvergent = true
294
295//-----------------------------------
296// Explicit Memory Fence Functions
297//-----------------------------------
298class MEMBAR<string StrOp, Intrinsic IntOP> :
299              NVPTXInst<(outs), (ins),
300            StrOp, [(IntOP)]>;
301
302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
303def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
305
306
307//-----------------------------------
308// Async Copy Functions
309//-----------------------------------
310
311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
312  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
313            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
314            [(Intrin Int32Regs:$addr)]>,
315    Requires<[hasPTX70, hasSM80]>;
316  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
317            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
318            [(Intrin Int64Regs:$addr)]>,
319    Requires<[hasPTX70, hasSM80]>;
320}
321
322defm CP_ASYNC_MBARRIER_ARRIVE :
323  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
325  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
327  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
329  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
330
331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
332  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
333            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
334            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
335    Requires<[hasPTX70, hasSM80]>;
336  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
337            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
338            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
339    Requires<[hasPTX70, hasSM80]>;
340}
341
342defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
343  CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
344
345defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
346  CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
347
348defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
349  CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
350
351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
352  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
353            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
354            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
355    Requires<[hasPTX70, hasSM80]>;
356  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
357            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
358            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
359    Requires<[hasPTX70, hasSM80]>;
360}
361
362defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
363  CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
364
365def CP_ASYNC_COMMIT_GROUP :
366  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
367  Requires<[hasPTX70, hasSM80]>;
368
369def CP_ASYNC_WAIT_GROUP :
370  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
371  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
372  Requires<[hasPTX70, hasSM80]>;
373
374def CP_ASYNC_WAIT_ALL :
375  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
376  [(int_nvvm_cp_async_wait_all)]>,
377  Requires<[hasPTX70, hasSM80]>;
378
379//-----------------------------------
380// MBarrier Functions
381//-----------------------------------
382
383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
384  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
385           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
386    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
387    Requires<[hasPTX70, hasSM80]>;
388  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
389           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
390    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
391    Requires<[hasPTX70, hasSM80]>;
392}
393
394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
396                                          int_nvvm_mbarrier_init_shared>;
397
398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
399  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
400           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
401    [(Intrin Int32Regs:$addr)]>,
402    Requires<[hasPTX70, hasSM80]>;
403  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
404           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
405    [(Intrin Int64Regs:$addr)]>,
406    Requires<[hasPTX70, hasSM80]>;
407}
408
409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
411                                            int_nvvm_mbarrier_inval_shared>;
412
413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
414  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
415           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
416    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
417    Requires<[hasPTX70, hasSM80]>;
418  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
419           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
420    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
421    Requires<[hasPTX70, hasSM80]>;
422}
423
424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
425defm MBARRIER_ARRIVE_SHARED :
426  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
427
428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
429  def _32 : NVPTXInst<(outs Int64Regs:$state),
430           (ins Int32Regs:$addr, Int32Regs:$count),
431           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
432                      ".b64 $state, [$addr], $count;"),
433    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
434    Requires<[hasPTX70, hasSM80]>;
435  def _64 : NVPTXInst<(outs Int64Regs:$state),
436           (ins Int64Regs:$addr, Int32Regs:$count),
437           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
438                      ".b64 $state, [$addr], $count;"),
439    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
440    Requires<[hasPTX70, hasSM80]>;
441}
442
443defm MBARRIER_ARRIVE_NOCOMPLETE :
444  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
446  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
447
448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
449  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
450           !strconcat("mbarrier.arrive_drop", AddrSpace,
451                      ".b64 $state, [$addr];"),
452           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
453    Requires<[hasPTX70, hasSM80]>;
454  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
455           !strconcat("mbarrier.arrive_drop", AddrSpace,
456                      ".b64 $state, [$addr];"),
457           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
458    Requires<[hasPTX70, hasSM80]>;
459}
460
461defm MBARRIER_ARRIVE_DROP :
462  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
463defm MBARRIER_ARRIVE_DROP_SHARED :
464  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
465
466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
467  def _32 : NVPTXInst<(outs Int64Regs:$state),
468           (ins Int32Regs:$addr, Int32Regs:$count),
469           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
470                      ".b64 $state, [$addr], $count;"),
471           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
472    Requires<[hasPTX70, hasSM80]>;
473  def _64 : NVPTXInst<(outs Int64Regs:$state),
474           (ins Int64Regs:$addr, Int32Regs:$count),
475           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
476                      ".b64 $state, [$addr], $count;"),
477           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
478    Requires<[hasPTX70, hasSM80]>;
479}
480
481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
482  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
484  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
485                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
486
487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
488  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
489           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
490           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
491    Requires<[hasPTX70, hasSM80]>;
492  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
493           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
494           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
495    Requires<[hasPTX70, hasSM80]>;
496}
497
498defm MBARRIER_TEST_WAIT :
499  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
500defm MBARRIER_TEST_WAIT_SHARED :
501  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
502
503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
504           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
505           "mbarrier.pending_count.b64 $res, $state;",
506           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
507    Requires<[hasPTX70, hasSM80]>;
508
509def MBARRIER_PENDING_COUNT :
510  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
511
512//-----------------------------------
513// Math Functions
514//-----------------------------------
515
516// Map min(1.0, max(0.0, x)) to sat(x)
517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
518// NaN
519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
520// Same story for fmax, fmin.
521
522def : Pat<(int_nvvm_fmin_f immFloat1,
523            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
524          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
525def : Pat<(int_nvvm_fmin_f immFloat1,
526            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
527          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
528def : Pat<(int_nvvm_fmin_f
529            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
530          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531def : Pat<(int_nvvm_fmin_f
532            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
533          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
534
535def : Pat<(int_nvvm_fmin_d immDouble1,
536            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
537          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
538def : Pat<(int_nvvm_fmin_d immDouble1,
539            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
540          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
541def : Pat<(int_nvvm_fmin_d
542            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
543          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
544def : Pat<(int_nvvm_fmin_d
545            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
546          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
547
548
549// We need a full string for OpcStr here because we need to deal with case like
550// INT_PTX_RECIP.
551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
552  NVPTXRegClass src_regclass, Intrinsic IntOP>
553            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
554            OpcStr,
555        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
556
557// We need a full string for OpcStr here because we need to deal with the case
558// like INT_PTX_NATIVE_POWR_F.
559class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
560  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
561            : NVPTXInst<(outs t_regclass:$dst),
562              (ins s0_regclass:$src0, s1_regclass:$src1),
563            OpcStr,
564        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
565
566class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
567  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
568  NVPTXRegClass s2_regclass, Intrinsic IntOP>
569            : NVPTXInst<(outs t_regclass:$dst),
570              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
571            OpcStr,
572        [(set t_regclass:$dst,
573          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
574
575//
576// MISC
577//
578
579def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
580  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
581
582//
583// Min Max
584//
585
586def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
587  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
588def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
589  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
590
591def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
592  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
593def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
594  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
595
596def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
597  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
598def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
599  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
600
601
602//
603// Multiplication
604//
605
606def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
607  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
608def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
609  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
610
611def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
612  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
613def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
614  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
615
616def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
617  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
618def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
619  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
620def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
621  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
622def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
623  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
624def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
625  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
626def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
627  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
628def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
629  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
630def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
631  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
632
633def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
634  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
635def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
636  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
637def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
638  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
639def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
640  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
641
642def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
643  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
644def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
645  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
646
647//
648// Div
649//
650
651def INT_NVVM_DIV_APPROX_FTZ_F
652  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
653    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
654def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
655  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
656
657def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
658  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
659def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
660  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
661def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
662  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
663def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
664  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
665def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
666  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
667def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
668  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
669def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
670  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
671def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
672  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
673
674def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
675  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
676def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
677  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
678def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
679  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
680def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
681  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
682
683//
684// Sad
685//
686
687def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
688  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
689def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
690  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
691
692//
693// Floor  Ceil
694//
695
696def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
697          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
698def : Pat<(int_nvvm_floor_f Float32Regs:$a),
699          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
700def : Pat<(int_nvvm_floor_d Float64Regs:$a),
701          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
702
703def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
704          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
705def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
706          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
707def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
708          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
709
710//
711// Abs
712//
713
714def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
715  Float32Regs, int_nvvm_fabs_ftz_f>;
716def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
717  Float32Regs, int_nvvm_fabs_f>;
718
719def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
720  Float64Regs, int_nvvm_fabs_d>;
721
722//
723// Round
724//
725
726def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
727          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
728def : Pat<(int_nvvm_round_f Float32Regs:$a),
729          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
730def : Pat<(int_nvvm_round_d Float64Regs:$a),
731          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
732
733//
734// Trunc
735//
736
737def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
738          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
739def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
740          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
741def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
742          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
743
744//
745// Saturate
746//
747
748def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
749          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
750def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
751          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
752def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
753          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
754
755//
756// Exp2  Log2
757//
758
759def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
760  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
761def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
762  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
763def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
764  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
765
766def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
767  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
768def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
769  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
770def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
771  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
772
773//
774// Sin  Cos
775//
776
777def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
778  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
779def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
780  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
781
782def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
783  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
784def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
785  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
786
787//
788// Fma
789//
790
791def INT_NVVM_FMA_RN_FTZ_F
792  : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
793    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
794def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
795  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
796def INT_NVVM_FMA_RZ_FTZ_F
797  : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
798    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
799def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
800  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
801def INT_NVVM_FMA_RM_FTZ_F
802  : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
803    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
804def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
805  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
806def INT_NVVM_FMA_RP_FTZ_F
807  : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
808    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
809def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
810  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
811
812def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
813  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
814def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
815  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
816def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
817  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
818def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
819  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
820
821//
822// Rcp
823//
824
825def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
826  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
827def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
828  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
829def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
830  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
831def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
832  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
833def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
834  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
835def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
836  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
837def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
838  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
839def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
840  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
841
842def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
843  Float64Regs, int_nvvm_rcp_rn_d>;
844def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
845  Float64Regs, int_nvvm_rcp_rz_d>;
846def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
847  Float64Regs, int_nvvm_rcp_rm_d>;
848def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
849  Float64Regs, int_nvvm_rcp_rp_d>;
850
851def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
852  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
853
854//
855// Sqrt
856//
857
858def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
859  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
860def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
861  Float32Regs, int_nvvm_sqrt_rn_f>;
862def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
863  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
864def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
865  Float32Regs, int_nvvm_sqrt_rz_f>;
866def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
867  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
868def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
869  Float32Regs, int_nvvm_sqrt_rm_f>;
870def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
871  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
872def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
873  Float32Regs, int_nvvm_sqrt_rp_f>;
874def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
875  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
876def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
877  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
878
879def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
880  Float64Regs, int_nvvm_sqrt_rn_d>;
881def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
882  Float64Regs, int_nvvm_sqrt_rz_d>;
883def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
884  Float64Regs, int_nvvm_sqrt_rm_d>;
885def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
886  Float64Regs, int_nvvm_sqrt_rp_d>;
887
888// nvvm_sqrt intrinsic
889def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
890          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
891def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
892          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
893def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
894          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
895def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
896          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
897
898//
899// Rsqrt
900//
901
902def INT_NVVM_RSQRT_APPROX_FTZ_F
903  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
904    int_nvvm_rsqrt_approx_ftz_f>;
905def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
906  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
907def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
908  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
909
910//
911// Add
912//
913
914def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
915  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
916def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
917  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
918def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
919  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
920def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
921  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
922def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
923  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
924def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
925  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
926def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
927  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
928def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
929  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
930
931def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
932  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
933def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
934  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
935def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
936  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
937def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
938  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
939
940//
941// Convert
942//
943
944def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
945          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
946def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
947          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
948def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
949          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
950def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
951          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
952def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
953          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
954def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
955          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
956def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
957          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
958def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
959          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
960
961def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
962          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
963def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
964          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
965def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
966          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
967def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
968          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
969
970def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
971          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
972def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
973          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
974def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
975          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
976def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
977          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
978
979def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
980          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
981def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
982          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
983def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
984          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
985def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
986          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
987
988def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
989          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
990def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
991          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
992def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
993          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
994def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
995          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
996
997def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
998          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
999def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1000          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1001def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1002          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1003def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1004          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1005def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1006          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1007def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1008          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1009def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1010          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1011def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1012          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1013
1014def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1015          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1016def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1017          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1018def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1019          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1020def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1021          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1022def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1023          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1024def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1025          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1026def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1027          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1028def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1029          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1030
1031def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1032          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1033def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1034          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1035def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1036          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1037def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1038          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1039
1040def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1041          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1042def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1043          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1044def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1045          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1046def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1047          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1048
1049def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
1050          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1051def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1052          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1053def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
1054          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1055def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1056          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1057
1058def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
1059          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
1060def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
1061          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
1062def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
1063          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
1064def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
1065          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
1066
1067def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
1068          (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
1069def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
1070          (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
1071def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
1072          (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
1073def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
1074          (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
1075
1076def CVT_tf32_f32 :
1077   NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
1078                   "cvt.rna.tf32.f32 \t$dest, $a;",
1079       [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
1080
1081def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1082  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1083
1084def INT_NVVM_D2I_LO : F_MATH_1<
1085  !strconcat("{{\n\t",
1086             ".reg .b32 %temp; \n\t",
1087             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1088             "}}"),
1089  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1090def INT_NVVM_D2I_HI : F_MATH_1<
1091  !strconcat("{{\n\t",
1092             ".reg .b32 %temp; \n\t",
1093             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1094             "}}"),
1095  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1096
1097def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1098          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1099def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1100          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1101def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1102          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1103def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1104          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1105def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1106          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1107def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1108          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1109def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1110          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1111def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1112          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1113
1114def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1115          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1116def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1117          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1118def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1119          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1120def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1121          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1122def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1123          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1124def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1125          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1126def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1127          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1128def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1129          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1130
1131def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1132          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1133def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1134          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1135def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1136          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1137def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1138          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1139
1140def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1141          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1142def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1143          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1144def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1145          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1146def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1147          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1148
1149def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1150          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1151def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1152          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1153def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1154          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1155def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1156          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1157
1158def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1159          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1160def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1161          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1162def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1163          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1164def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1165          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1166
1167def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1168          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1169def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1170          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1171def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1172          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1173def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1174          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1175
1176def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1177          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1178def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1179          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1180def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1181          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1182def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1183          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1184
1185
1186def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1187          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
1188def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1189          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
1190
1191//
1192// Bitcast
1193//
1194
1195def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1196  Float32Regs, int_nvvm_bitcast_f2i>;
1197def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1198  Int32Regs, int_nvvm_bitcast_i2f>;
1199
1200def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1201  Int64Regs, int_nvvm_bitcast_ll2d>;
1202def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1203  Float64Regs, int_nvvm_bitcast_d2ll>;
1204
1205//
1206// FNS
1207//
1208
1209class INT_FNS_MBO<dag ins, dag Operands>
1210  : NVPTXInst<(outs Int32Regs:$dst), ins,
1211               "fns.b32 \t$dst, $mask, $base, $offset;",
1212               [(set Int32Regs:$dst, Operands )]>,
1213    Requires<[hasPTX60, hasSM30]>;
1214
1215def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1216                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1217def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1218                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1219def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1220                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1221def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1222                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1223def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1224                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1225def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1226                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1227def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1228                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1229def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1230                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1231
1232//-----------------------------------
1233// Atomic Functions
1234//-----------------------------------
1235
1236class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1237 : PatFrag<ops, frag, AS_match.global>;
1238class ATOMIC_SHARED_CHK <dag ops, dag frag>
1239 : PatFrag<ops, frag, AS_match.shared>;
1240class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1241 : PatFrag<ops, frag, AS_match.generic>;
1242
1243multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1244  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1245  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1246  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1247    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1248    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1249  Requires<Pred>;
1250  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1251    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1252    [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1253  Requires<Pred>;
1254}
1255multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1256  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1257  list<Predicate> Pred = []> {
1258  defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1259    IntOp, IMMType, IMM, Pred>;
1260  defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1261    IntOp, IMMType, IMM, Pred>;
1262}
1263
1264// has 2 operands, neg the second one
1265multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1266  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1267  list<Predicate> Pred> {
1268  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1269    !strconcat(
1270      "{{ \n\t",
1271      ".reg \t.s", TypeStr, " temp; \n\t",
1272      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1273      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1274      "}}"),
1275    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1276  Requires<Pred>;
1277}
1278multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1279  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1280 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1281   IntOp, Pred> ;
1282 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1283   IntOp, Pred> ;
1284}
1285
1286// has 3 operands
1287multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1288  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1289  Operand IMMType, list<Predicate> Pred> {
1290  def reg : NVPTXInst<(outs regclass:$dst),
1291    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1292    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1293    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1294  Requires<Pred>;
1295
1296  def imm1 : NVPTXInst<(outs regclass:$dst),
1297    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1298    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1299    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1300  Requires<Pred>;
1301
1302  def imm2 : NVPTXInst<(outs regclass:$dst),
1303    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1304    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1305    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1306  Requires<Pred>;
1307
1308  def imm3 : NVPTXInst<(outs regclass:$dst),
1309    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1310    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1311    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1312  Requires<Pred>;
1313}
1314multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1315  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1316  defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1317    IntOp, IMMType, Pred>;
1318  defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1319    IntOp, IMMType, Pred>;
1320}
1321
1322// atom_add
1323
1324def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1325  (atomic_load_add_32 node:$a, node:$b)>;
1326def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1327  (atomic_load_add_32 node:$a, node:$b)>;
1328def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1329  (atomic_load_add_32 node:$a, node:$b)>;
1330def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1331  (atomic_load_add_64 node:$a, node:$b)>;
1332def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1333  (atomic_load_add_64 node:$a, node:$b)>;
1334def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1335  (atomic_load_add_64 node:$a, node:$b)>;
1336def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1337  (atomic_load_fadd node:$a, node:$b)>;
1338def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1339  (atomic_load_fadd node:$a, node:$b)>;
1340def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1341  (atomic_load_fadd node:$a, node:$b)>;
1342
1343defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1344  atomic_load_add_32_g, i32imm, imm>;
1345defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1346  atomic_load_add_32_s, i32imm, imm>;
1347defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1348  atomic_load_add_32_gen, i32imm, imm>;
1349defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1350  ".add", atomic_load_add_32_gen, i32imm, imm>;
1351
1352defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1353  atomic_load_add_64_g, i64imm, imm>;
1354defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1355  atomic_load_add_64_s, i64imm, imm>;
1356defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1357  atomic_load_add_64_gen, i64imm, imm>;
1358defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1359  ".add", atomic_load_add_64_gen, i64imm, imm>;
1360
1361defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1362  atomic_load_add_g, f32imm, fpimm>;
1363defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1364  atomic_load_add_s, f32imm, fpimm>;
1365defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1366  atomic_load_add_gen, f32imm, fpimm>;
1367
1368defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1369  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1370defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1371  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1372defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1373  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1374
1375// atom_sub
1376
1377def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1378  (atomic_load_sub_32 node:$a, node:$b)>;
1379def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1380  (atomic_load_sub_32 node:$a, node:$b)>;
1381def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1382  (atomic_load_sub_32 node:$a, node:$b)>;
1383def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1384  (atomic_load_sub_64 node:$a, node:$b)>;
1385def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1386  (atomic_load_sub_64 node:$a, node:$b)>;
1387def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1388  (atomic_load_sub_64 node:$a, node:$b)>;
1389
1390defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1391  atomic_load_sub_32_g>;
1392defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1393  atomic_load_sub_64_g>;
1394defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1395  atomic_load_sub_32_gen>;
1396defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1397  ".add", atomic_load_sub_32_gen>;
1398defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1399  atomic_load_sub_32_s>;
1400defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1401  atomic_load_sub_64_s>;
1402defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1403  atomic_load_sub_64_gen>;
1404defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1405  ".add", atomic_load_sub_64_gen>;
1406
1407// atom_swap
1408
1409def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1410  (atomic_swap_32 node:$a, node:$b)>;
1411def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1412  (atomic_swap_32 node:$a, node:$b)>;
1413def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1414  (atomic_swap_32 node:$a, node:$b)>;
1415def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1416  (atomic_swap_64 node:$a, node:$b)>;
1417def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1418  (atomic_swap_64 node:$a, node:$b)>;
1419def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1420  (atomic_swap_64 node:$a, node:$b)>;
1421
1422defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1423  atomic_swap_32_g, i32imm, imm>;
1424defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1425  atomic_swap_32_s, i32imm, imm>;
1426defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1427  atomic_swap_32_gen, i32imm, imm>;
1428defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1429  ".exch", atomic_swap_32_gen, i32imm, imm>;
1430defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1431  atomic_swap_64_g, i64imm, imm>;
1432defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1433  atomic_swap_64_s, i64imm, imm>;
1434defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1435  atomic_swap_64_gen, i64imm, imm>;
1436defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1437  ".exch", atomic_swap_64_gen, i64imm, imm>;
1438
1439// atom_max
1440
1441def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1442  , (atomic_load_max_32 node:$a, node:$b)>;
1443def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1444  (atomic_load_max_32 node:$a, node:$b)>;
1445def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1446  (atomic_load_max_32 node:$a, node:$b)>;
1447def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1448  , (atomic_load_max_64 node:$a, node:$b)>;
1449def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1450  (atomic_load_max_64 node:$a, node:$b)>;
1451def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1452  (atomic_load_max_64 node:$a, node:$b)>;
1453def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1454  (atomic_load_umax_32 node:$a, node:$b)>;
1455def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1456  (atomic_load_umax_32 node:$a, node:$b)>;
1457def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1458  (atomic_load_umax_32 node:$a, node:$b)>;
1459def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1460  (atomic_load_umax_64 node:$a, node:$b)>;
1461def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1462  (atomic_load_umax_64 node:$a, node:$b)>;
1463def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1464  (atomic_load_umax_64 node:$a, node:$b)>;
1465
1466defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1467  ".max", atomic_load_max_32_g, i32imm, imm>;
1468defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1469  ".max", atomic_load_max_32_s, i32imm, imm>;
1470defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1471  atomic_load_max_32_gen, i32imm, imm>;
1472defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1473  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1474defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1475  ".max", atomic_load_max_64_g, i64imm, imm>;
1476defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1477  ".max", atomic_load_max_64_s, i64imm, imm>;
1478defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1479  atomic_load_max_64_gen, i64imm, imm>;
1480defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1481  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1482defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1483  ".max", atomic_load_umax_32_g, i32imm, imm>;
1484defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1485  ".max", atomic_load_umax_32_s, i32imm, imm>;
1486defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1487  atomic_load_umax_32_gen, i32imm, imm>;
1488defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1489  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1490defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1491  ".max", atomic_load_umax_64_g, i64imm, imm>;
1492defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1493  ".max", atomic_load_umax_64_s, i64imm, imm>;
1494defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1495  atomic_load_umax_64_gen, i64imm, imm>;
1496defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1497  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1498
1499// atom_min
1500
1501def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1502  (atomic_load_min_32 node:$a, node:$b)>;
1503def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1504  (atomic_load_min_32 node:$a, node:$b)>;
1505def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1506  (atomic_load_min_32 node:$a, node:$b)>;
1507def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1508  (atomic_load_min_64 node:$a, node:$b)>;
1509def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1510  (atomic_load_min_64 node:$a, node:$b)>;
1511def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1512  (atomic_load_min_64 node:$a, node:$b)>;
1513def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1514  (atomic_load_umin_32 node:$a, node:$b)>;
1515def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1516  (atomic_load_umin_32 node:$a, node:$b)>;
1517def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1518  (atomic_load_umin_32 node:$a, node:$b)>;
1519def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1520  (atomic_load_umin_64 node:$a, node:$b)>;
1521def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1522  (atomic_load_umin_64 node:$a, node:$b)>;
1523def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1524  (atomic_load_umin_64 node:$a, node:$b)>;
1525
1526defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1527  ".min", atomic_load_min_32_g, i32imm, imm>;
1528defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1529  ".min", atomic_load_min_32_s, i32imm, imm>;
1530defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1531  atomic_load_min_32_gen, i32imm, imm>;
1532defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1533  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1534defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1535  ".min", atomic_load_min_64_g, i64imm, imm>;
1536defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1537  ".min", atomic_load_min_64_s, i64imm, imm>;
1538defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1539  atomic_load_min_64_gen, i64imm, imm>;
1540defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1541  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1542defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1543  ".min", atomic_load_umin_32_g, i32imm, imm>;
1544defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1545  ".min", atomic_load_umin_32_s, i32imm, imm>;
1546defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1547  atomic_load_umin_32_gen, i32imm, imm>;
1548defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1549  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1550defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1551  ".min", atomic_load_umin_64_g, i64imm, imm>;
1552defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1553  ".min", atomic_load_umin_64_s, i64imm, imm>;
1554defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1555  atomic_load_umin_64_gen, i64imm, imm>;
1556defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1557  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1558
1559// atom_inc  atom_dec
1560
1561def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1562  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1563def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1564  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1565def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1566  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1567def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1568  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1569def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1570  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1571def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1572  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1573
1574defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1575  atomic_load_inc_32_g, i32imm, imm>;
1576defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1577  atomic_load_inc_32_s, i32imm, imm>;
1578defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1579  atomic_load_inc_32_gen, i32imm, imm>;
1580defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1581  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1582defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1583  atomic_load_dec_32_g, i32imm, imm>;
1584defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1585  atomic_load_dec_32_s, i32imm, imm>;
1586defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1587  atomic_load_dec_32_gen, i32imm, imm>;
1588defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1589  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1590
1591// atom_and
1592
1593def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1594  (atomic_load_and_32 node:$a, node:$b)>;
1595def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1596  (atomic_load_and_32 node:$a, node:$b)>;
1597def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1598  (atomic_load_and_32 node:$a, node:$b)>;
1599def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1600  (atomic_load_and_64 node:$a, node:$b)>;
1601def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1602  (atomic_load_and_64 node:$a, node:$b)>;
1603def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1604  (atomic_load_and_64 node:$a, node:$b)>;
1605
1606defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1607  atomic_load_and_32_g, i32imm, imm>;
1608defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1609  atomic_load_and_32_s, i32imm, imm>;
1610defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1611  atomic_load_and_32_gen, i32imm, imm>;
1612defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1613  ".and", atomic_load_and_32_gen, i32imm, imm>;
1614defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1615  atomic_load_and_64_g, i64imm, imm>;
1616defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1617  atomic_load_and_64_s, i64imm, imm>;
1618defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1619  atomic_load_and_64_gen, i64imm, imm>;
1620defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1621  ".and", atomic_load_and_64_gen, i64imm, imm>;
1622
1623// atom_or
1624
1625def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1626  (atomic_load_or_32 node:$a, node:$b)>;
1627def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1628  (atomic_load_or_32 node:$a, node:$b)>;
1629def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1630  (atomic_load_or_32 node:$a, node:$b)>;
1631def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1632  (atomic_load_or_64 node:$a, node:$b)>;
1633def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1634  (atomic_load_or_64 node:$a, node:$b)>;
1635def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1636  (atomic_load_or_64 node:$a, node:$b)>;
1637
1638defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1639  atomic_load_or_32_g, i32imm, imm>;
1640defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1641  atomic_load_or_32_gen, i32imm, imm>;
1642defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1643  ".or", atomic_load_or_32_gen, i32imm, imm>;
1644defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1645  atomic_load_or_32_s, i32imm, imm>;
1646defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1647  atomic_load_or_64_g, i64imm, imm>;
1648defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1649  atomic_load_or_64_gen, i64imm, imm>;
1650defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1651  ".or", atomic_load_or_64_gen, i64imm, imm>;
1652defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1653  atomic_load_or_64_s, i64imm, imm>;
1654
1655// atom_xor
1656
1657def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1658  (atomic_load_xor_32 node:$a, node:$b)>;
1659def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1660  (atomic_load_xor_32 node:$a, node:$b)>;
1661def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1662  (atomic_load_xor_32 node:$a, node:$b)>;
1663def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1664  (atomic_load_xor_64 node:$a, node:$b)>;
1665def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1666  (atomic_load_xor_64 node:$a, node:$b)>;
1667def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1668  (atomic_load_xor_64 node:$a, node:$b)>;
1669
1670defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1671  atomic_load_xor_32_g, i32imm, imm>;
1672defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1673  atomic_load_xor_32_s, i32imm, imm>;
1674defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1675  atomic_load_xor_32_gen, i32imm, imm>;
1676defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1677  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1678defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1679  atomic_load_xor_64_g, i64imm, imm>;
1680defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1681  atomic_load_xor_64_s, i64imm, imm>;
1682defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1683  atomic_load_xor_64_gen, i64imm, imm>;
1684defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1685  ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1686
1687// atom_cas
1688
1689def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1690  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1691def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1692  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1693def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1694  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1695def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1696  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1697def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1698  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1699def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1700  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1701
1702defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1703  atomic_cmp_swap_32_g, i32imm>;
1704defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1705  atomic_cmp_swap_32_s, i32imm>;
1706defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1707  atomic_cmp_swap_32_gen, i32imm>;
1708defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1709  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1710defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1711  atomic_cmp_swap_64_g, i64imm>;
1712defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1713  atomic_cmp_swap_64_s, i64imm>;
1714defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1715  atomic_cmp_swap_64_gen, i64imm>;
1716defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1717  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1718
1719// Support for scoped atomic operations.  Matches
1720// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1721// and converts it into the appropriate instruction.
1722// NOTE: not all possible combinations are implemented
1723//  'space' is limited to generic as it's the only one needed to support CUDA.
1724//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1725class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1726                  dag ins, dag Operands>
1727      : NVPTXInst<(outs regclass:$result), ins,
1728                  AsmStr,
1729                  [(set regclass:$result, Operands)]>,
1730        Requires<Preds>;
1731
1732// Define instruction variants for all addressing modes.
1733multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1734                       NVPTXRegClass regclass, Operand ImmType,
1735                       SDNode Imm, ValueType ImmTy,
1736                       list<Predicate> Preds> {
1737  let AddedComplexity = 1 in {
1738    def : ATOM23_impl<AsmStr, regclass, Preds,
1739                      (ins Int32Regs:$src, regclass:$b),
1740                      (Intr Int32Regs:$src, regclass:$b)>;
1741    def : ATOM23_impl<AsmStr, regclass, Preds,
1742                      (ins Int64Regs:$src, regclass:$b),
1743                      (Intr Int64Regs:$src, regclass:$b)>;
1744  }
1745  // tablegen can't infer argument types from Intrinsic (though it can
1746  // from Instruction) so we have to enforce specific type on
1747  // immediates via explicit cast to ImmTy.
1748  def : ATOM23_impl<AsmStr, regclass, Preds,
1749                    (ins Int32Regs:$src, ImmType:$b),
1750                    (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1751  def : ATOM23_impl<AsmStr, regclass, Preds,
1752                    (ins Int64Regs:$src, ImmType:$b),
1753                    (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1754}
1755
1756multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1757                       NVPTXRegClass regclass, Operand ImmType,
1758                       SDNode Imm, ValueType ImmTy,
1759                       list<Predicate> Preds> {
1760  // Variants for register/immediate permutations of $b and $c
1761  let AddedComplexity = 2 in {
1762    def : ATOM23_impl<AsmStr, regclass, Preds,
1763                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
1764                      (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1765    def : ATOM23_impl<AsmStr, regclass, Preds,
1766                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
1767                      (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1768  }
1769  let AddedComplexity = 1 in {
1770    def : ATOM23_impl<AsmStr, regclass, Preds,
1771                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1772                      (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1773    def : ATOM23_impl<AsmStr, regclass, Preds,
1774                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1775                      (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1776    def : ATOM23_impl<AsmStr, regclass, Preds,
1777                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1778                      (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1779    def : ATOM23_impl<AsmStr, regclass, Preds,
1780                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1781                      (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1782  }
1783  def : ATOM23_impl<AsmStr, regclass, Preds,
1784                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1785                    (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1786  def : ATOM23_impl<AsmStr, regclass, Preds,
1787                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1788                    (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1789}
1790
1791// Constructs instrinsic name and instruction asm strings.
1792multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1793                       string ScopeStr, string SpaceStr,
1794                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1795                       ValueType ImmTy, list<Predicate> Preds> {
1796  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1797                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1798                            # "." # OpStr # "." # TypeStr
1799                            # " \t$result, [$src], $b;",
1800                     !cast<Intrinsic>(
1801                            "int_nvvm_atomic_" # OpStr
1802                            # "_" # SpaceStr # "_" # IntTypeStr
1803                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1804                     regclass, ImmType, Imm, ImmTy, Preds>;
1805}
1806multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1807                       string ScopeStr, string SpaceStr,
1808                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1809                       ValueType ImmTy, list<Predicate> Preds> {
1810  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1811                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1812                            # "." # OpStr # "." # TypeStr
1813                            # " \t$result, [$src], $b, $c;",
1814                     !cast<Intrinsic>(
1815                            "int_nvvm_atomic_" # OpStr
1816                            # "_" # SpaceStr # "_" # IntTypeStr
1817                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1818                     regclass, ImmType, Imm, ImmTy, Preds>;
1819}
1820
1821// Constructs variants for different address spaces.
1822// For now we only need variants for generic space pointers.
1823multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1824                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1825                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1826   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1827                            regclass, ImmType, Imm, ImmTy, Preds>;
1828}
1829multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1830                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1831                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1832   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1833                            regclass, ImmType, Imm, ImmTy, Preds>;
1834}
1835
1836// Constructs variants for different scopes of atomic op.
1837multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1838                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1839                       ValueType ImmTy, list<Predicate> Preds> {
1840   // .gpu scope is default and is currently covered by existing
1841   // atomics w/o explicitly specified scope.
1842   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1843                           regclass, ImmType, Imm, ImmTy,
1844                           !listconcat(Preds,[hasAtomScope])>;
1845   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1846                           regclass, ImmType, Imm, ImmTy,
1847                           !listconcat(Preds,[hasAtomScope])>;
1848}
1849multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1850           NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1851           list<Predicate> Preds> {
1852   // No need to define ".gpu"-scoped atomics.  They do the same thing
1853   // as the regular, non-scoped atomics defined elsewhere.
1854   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1855                           regclass, ImmType, Imm, ImmTy,
1856                           !listconcat(Preds,[hasAtomScope])>;
1857   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1858                           regclass, ImmType, Imm, ImmTy,
1859                           !listconcat(Preds,[hasAtomScope])>;
1860}
1861
1862// atom.add
1863multiclass ATOM2_add_impl<string OpStr> {
1864   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1865   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1866   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1867   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1868                            []>;
1869   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1870                            [hasAtomAddF64]>;
1871}
1872
1873// atom.{and,or,xor}
1874multiclass ATOM2_bitwise_impl<string OpStr> {
1875   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1876   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1877                            [hasAtomBitwise64]>;
1878}
1879
1880// atom.exch
1881multiclass ATOM2_exch_impl<string OpStr> {
1882   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1883   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1884}
1885
1886// atom.{min,max}
1887multiclass ATOM2_minmax_impl<string OpStr> {
1888   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1889   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1890   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1891                            [hasAtomMinMax64]>;
1892   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1893                            [hasAtomMinMax64]>;
1894}
1895
1896// atom.{inc,dec}
1897multiclass ATOM2_incdec_impl<string OpStr> {
1898   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1899}
1900
1901// atom.cas
1902multiclass ATOM3_cas_impl<string OpStr> {
1903   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1904   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1905}
1906
1907defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1908defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1909defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1910defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1911defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1912defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1913defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1914defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1915defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1916defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1917
1918//-----------------------------------
1919// Support for ldu on sm_20 or later
1920//-----------------------------------
1921
1922// Don't annotate ldu instructions as mayLoad, as they load from memory that is
1923// read-only in a kernel.
1924
1925// Scalar
1926
1927multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1928  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1929               !strconcat("ldu.global.", TyStr),
1930                      []>, Requires<[hasLDU]>;
1931  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1932               !strconcat("ldu.global.", TyStr),
1933                        []>, Requires<[hasLDU]>;
1934 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1935               !strconcat("ldu.global.", TyStr),
1936                      []>, Requires<[hasLDU]>;
1937 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1938               !strconcat("ldu.global.", TyStr),
1939                      []>, Requires<[hasLDU]>;
1940 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1941               !strconcat("ldu.global.", TyStr),
1942                        []>, Requires<[hasLDU]>;
1943}
1944
1945defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1946defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1947defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1948defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1949defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1950defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1951defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1952defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1953defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1954defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1955
1956// vector
1957
1958// Elementized vector ldu
1959multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1960 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1961                     (ins Int32Regs:$src),
1962                     !strconcat("ldu.global.", TyStr), []>;
1963 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1964                     (ins Int64Regs:$src),
1965                     !strconcat("ldu.global.", TyStr), []>;
1966 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1967                     (ins MEMri:$src),
1968                     !strconcat("ldu.global.", TyStr), []>;
1969 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1970                     (ins MEMri64:$src),
1971                     !strconcat("ldu.global.", TyStr), []>;
1972 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1973                     (ins imemAny:$src),
1974                     !strconcat("ldu.global.", TyStr), []>;
1975}
1976
1977multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1978 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1979                            regclass:$dst4), (ins Int32Regs:$src),
1980               !strconcat("ldu.global.", TyStr), []>;
1981 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1982                            regclass:$dst4), (ins Int64Regs:$src),
1983               !strconcat("ldu.global.", TyStr), []>;
1984 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1985                            regclass:$dst4), (ins MEMri:$src),
1986               !strconcat("ldu.global.", TyStr), []>;
1987 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1988                            regclass:$dst4), (ins MEMri64:$src),
1989               !strconcat("ldu.global.", TyStr), []>;
1990 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1991                            regclass:$dst4), (ins imemAny:$src),
1992               !strconcat("ldu.global.", TyStr), []>;
1993}
1994
1995defm INT_PTX_LDU_G_v2i8_ELE
1996  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1997defm INT_PTX_LDU_G_v2i16_ELE
1998  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1999defm INT_PTX_LDU_G_v2i32_ELE
2000  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2001defm INT_PTX_LDU_G_v2f16_ELE
2002  : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2003defm INT_PTX_LDU_G_v2f16x2_ELE
2004  : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2005defm INT_PTX_LDU_G_v2f32_ELE
2006  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2007defm INT_PTX_LDU_G_v2i64_ELE
2008  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2009defm INT_PTX_LDU_G_v2f64_ELE
2010  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2011defm INT_PTX_LDU_G_v4i8_ELE
2012  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2013defm INT_PTX_LDU_G_v4i16_ELE
2014  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2015    Int16Regs>;
2016defm INT_PTX_LDU_G_v4i32_ELE
2017  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2018    Int32Regs>;
2019defm INT_PTX_LDU_G_v4f16_ELE
2020  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2021    Float16Regs>;
2022defm INT_PTX_LDU_G_v4f16x2_ELE
2023  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2024    Float16x2Regs>;
2025defm INT_PTX_LDU_G_v4f32_ELE
2026  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
2027    Float32Regs>;
2028
2029
2030//-----------------------------------
2031// Support for ldg on sm_35 or later
2032//-----------------------------------
2033
2034// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2035// non-coherent texture cache, and therefore the values read must be read-only
2036// during the lifetime of the kernel.
2037
2038multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2039  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2040               !strconcat("ld.global.nc.", TyStr),
2041                      []>, Requires<[hasLDG]>;
2042  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2043               !strconcat("ld.global.nc.", TyStr),
2044                        []>, Requires<[hasLDG]>;
2045 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2046               !strconcat("ld.global.nc.", TyStr),
2047                      []>, Requires<[hasLDG]>;
2048 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2049               !strconcat("ld.global.nc.", TyStr),
2050                      []>, Requires<[hasLDG]>;
2051 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2052               !strconcat("ld.global.nc.", TyStr),
2053                        []>, Requires<[hasLDG]>;
2054}
2055
2056defm INT_PTX_LDG_GLOBAL_i8
2057  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2058defm INT_PTX_LDG_GLOBAL_i16
2059  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2060defm INT_PTX_LDG_GLOBAL_i32
2061  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2062defm INT_PTX_LDG_GLOBAL_i64
2063  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2064defm INT_PTX_LDG_GLOBAL_f16
2065  : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
2066defm INT_PTX_LDG_GLOBAL_f16x2
2067  : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
2068defm INT_PTX_LDG_GLOBAL_f32
2069  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2070defm INT_PTX_LDG_GLOBAL_f64
2071  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2072defm INT_PTX_LDG_GLOBAL_p32
2073  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2074defm INT_PTX_LDG_GLOBAL_p64
2075  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2076
2077// vector
2078
2079// Elementized vector ldg
2080multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2081 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2082                     (ins Int32Regs:$src),
2083                     !strconcat("ld.global.nc.", TyStr), []>;
2084 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2085                     (ins Int64Regs:$src),
2086                     !strconcat("ld.global.nc.", TyStr), []>;
2087 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2088                     (ins MEMri:$src),
2089                     !strconcat("ld.global.nc.", TyStr), []>;
2090 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2091                     (ins MEMri64:$src),
2092                     !strconcat("ld.global.nc.", TyStr), []>;
2093 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2094                     (ins imemAny:$src),
2095                     !strconcat("ld.global.nc.", TyStr), []>;
2096}
2097
2098multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2099  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2100                              regclass:$dst4), (ins Int32Regs:$src),
2101               !strconcat("ld.global.nc.", TyStr), []>;
2102  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2103                               regclass:$dst4), (ins Int64Regs:$src),
2104               !strconcat("ld.global.nc.", TyStr), []>;
2105  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2106                              regclass:$dst4), (ins MEMri:$src),
2107               !strconcat("ld.global.nc.", TyStr), []>;
2108  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2109                              regclass:$dst4), (ins MEMri64:$src),
2110               !strconcat("ld.global.nc.", TyStr), []>;
2111  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2112                             regclass:$dst4), (ins imemAny:$src),
2113               !strconcat("ld.global.nc.", TyStr), []>;
2114}
2115
2116// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2117defm INT_PTX_LDG_G_v2i8_ELE
2118  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2119defm INT_PTX_LDG_G_v2i16_ELE
2120  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2121defm INT_PTX_LDG_G_v2i32_ELE
2122  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2123defm INT_PTX_LDG_G_v2f16_ELE
2124  : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2125defm INT_PTX_LDG_G_v2f16x2_ELE
2126  : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2127defm INT_PTX_LDG_G_v2f32_ELE
2128  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2129defm INT_PTX_LDG_G_v2i64_ELE
2130  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2131defm INT_PTX_LDG_G_v2f64_ELE
2132  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2133defm INT_PTX_LDG_G_v4i8_ELE
2134  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2135defm INT_PTX_LDG_G_v4i16_ELE
2136  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2137defm INT_PTX_LDG_G_v4i32_ELE
2138  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2139defm INT_PTX_LDG_G_v4f16_ELE
2140  : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
2141defm INT_PTX_LDG_G_v4f16x2_ELE
2142  : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
2143defm INT_PTX_LDG_G_v4f32_ELE
2144  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2145
2146
2147multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2148   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2149          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2150      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2151   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2152          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2153      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2154   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2155          "{{ .reg .b64 %tmp;\n\t"
2156          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2157          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2158      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2159      Requires<[useShortPtr]>;
2160}
2161
2162multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2163   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2164          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2165      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2166   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2167          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2168      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2169   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2170          "{{ .reg .b64 %tmp;\n\t"
2171          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2172          #"  cvt.u32.u64 \t$result, %tmp; }}",
2173      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2174      Requires<[useShortPtr]>;
2175}
2176
2177defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2178defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2179defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2180defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2181
2182defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2183defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2184defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2185defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2186
2187
2188// nvvm.ptr.gen.to.param
2189def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2190  (ins Int32Regs:$src),
2191                        "mov.u32 \t$result, $src;",
2192                              [(set Int32Regs:$result,
2193                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2194def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2195  (ins Int64Regs:$src),
2196                        "mov.u64 \t$result, $src;",
2197                              [(set Int64Regs:$result,
2198                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2199
2200
2201// nvvm.move intrinsicc
2202def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2203                             "mov.b16 \t$r, $s;",
2204                             [(set Int16Regs:$r,
2205                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2206def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2207                             "mov.b32 \t$r, $s;",
2208                             [(set Int32Regs:$r,
2209                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2210def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2211                             "mov.b64 \t$r, $s;",
2212                             [(set Int64Regs:$r,
2213                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2214def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2215                             "mov.f32 \t$r, $s;",
2216                             [(set Float32Regs:$r,
2217                               (int_nvvm_move_float Float32Regs:$s))]>;
2218def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2219                             "mov.f64 \t$r, $s;",
2220                             [(set Float64Regs:$r,
2221                               (int_nvvm_move_double Float64Regs:$s))]>;
2222def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2223                             "mov.u32 \t$r, $s;",
2224                             [(set Int32Regs:$r,
2225                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2226def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2227                             "mov.u64 \t$r, $s;",
2228                             [(set Int64Regs:$r,
2229                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2230
2231// @TODO: Are these actually needed, or will we always just see symbols
2232// copied to registers first?
2233/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2234                             "mov.u32 \t$r, $s;",
2235                             [(set Int32Regs:$r,
2236                             (int_nvvm_move_ptr texternalsym:$s))]>;
2237def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2238                             "mov.u64 \t$r, $s;",
2239                             [(set Int64Regs:$r,
2240                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2241
2242
2243// MoveParam        %r1, param
2244// ptr_local_to_gen %r2, %r1
2245// ptr_gen_to_local %r3, %r2
2246// ->
2247// mov %r1, param
2248
2249// @TODO: Revisit this.  There is a type
2250// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2251// instructions are not currently defined. However, we can use the ptr
2252// variants and the asm printer will do the right thing.
2253def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2254                (MoveParam texternalsym:$src)))),
2255               (nvvm_move_ptr64  texternalsym:$src)>;
2256def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2257                (MoveParam texternalsym:$src)))),
2258               (nvvm_move_ptr32  texternalsym:$src)>;
2259
2260def texsurf_handles
2261  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2262              "mov.u64 \t$result, $src;", []>;
2263
2264//-----------------------------------
2265// Compiler Error Warn
2266// - Just ignore them in codegen
2267//-----------------------------------
2268
2269def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2270                "// llvm.nvvm.compiler.warn()",
2271                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2272def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2273                "// llvm.nvvm.compiler.warn()",
2274                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2275def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2276                "// llvm.nvvm.compiler.error()",
2277                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2278def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2279                "// llvm.nvvm.compiler.error()",
2280                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2281
2282
2283// isspacep
2284
2285def ISSPACEP_CONST_32
2286  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2287              "isspacep.const \t$d, $a;",
2288              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2289    Requires<[hasPTX31]>;
2290def ISSPACEP_CONST_64
2291  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2292              "isspacep.const \t$d, $a;",
2293              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2294    Requires<[hasPTX31]>;
2295def ISSPACEP_GLOBAL_32
2296  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2297              "isspacep.global \t$d, $a;",
2298              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2299def ISSPACEP_GLOBAL_64
2300  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2301              "isspacep.global \t$d, $a;",
2302              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2303def ISSPACEP_LOCAL_32
2304  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2305              "isspacep.local \t$d, $a;",
2306              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2307def ISSPACEP_LOCAL_64
2308  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2309              "isspacep.local \t$d, $a;",
2310              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2311def ISSPACEP_SHARED_32
2312  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2313              "isspacep.shared \t$d, $a;",
2314              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2315def ISSPACEP_SHARED_64
2316  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2317              "isspacep.shared \t$d, $a;",
2318              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2319
2320
2321// Special register reads
2322def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2323                            (ins SpecialRegs:$r),
2324                            "mov.b32 \t$d, $r;", []>;
2325
2326def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2327def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2328def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2329def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2330def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2331def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2332def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2333def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2334def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2335def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2336def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2337def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2338def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2339def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2340def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2341def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2342def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2343def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2344def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2345def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2346def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2347def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2348def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2349def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2350def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2351def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2352def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2353def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2354def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2355def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2356def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2357def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2358
2359
2360// rotate builtin support
2361
2362def ROTATE_B32_HW_IMM
2363  : NVPTXInst<(outs Int32Regs:$dst),
2364              (ins  Int32Regs:$src, i32imm:$amt),
2365              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2366              [(set Int32Regs:$dst,
2367                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2368              Requires<[hasHWROT32]> ;
2369
2370def ROTATE_B32_HW_REG
2371  : NVPTXInst<(outs Int32Regs:$dst),
2372              (ins  Int32Regs:$src, Int32Regs:$amt),
2373              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2374              [(set Int32Regs:$dst,
2375                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2376              Requires<[hasHWROT32]> ;
2377
2378def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2379          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2380      Requires<[noHWROT32]> ;
2381
2382def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2383          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2384      Requires<[noHWROT32]> ;
2385
2386let hasSideEffects = false in {
2387  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2388    !strconcat("{{\n\t",
2389               ".reg .b32 %dummy;\n\t",
2390               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2391               "}}"),
2392          []> ;
2393
2394  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2395    !strconcat("{{\n\t",
2396               ".reg .b32 %dummy;\n\t",
2397               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2398               "}}"),
2399          []> ;
2400}
2401
2402let hasSideEffects = false in {
2403  def PACK_TWO_INT32
2404    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2405                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2406}
2407
2408def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2409          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2410                          (GET_LO_INT64 Int64Regs:$src))> ;
2411
2412// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2413// no side effects.
2414let hasSideEffects = false in {
2415  def SHF_L_WRAP_B32_IMM
2416    : NVPTXInst<(outs Int32Regs:$dst),
2417                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2418                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2419      Requires<[hasHWROT32]>;
2420
2421  def SHF_L_WRAP_B32_REG
2422    : NVPTXInst<(outs Int32Regs:$dst),
2423                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2424                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2425      Requires<[hasHWROT32]>;
2426
2427  def SHF_R_WRAP_B32_IMM
2428    : NVPTXInst<(outs Int32Regs:$dst),
2429                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2430                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2431      Requires<[hasHWROT32]>;
2432
2433  def SHF_R_WRAP_B32_REG
2434    : NVPTXInst<(outs Int32Regs:$dst),
2435                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2436                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2437      Requires<[hasHWROT32]>;
2438}
2439
2440// HW version of rotate 64
2441def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2442          (PACK_TWO_INT32
2443            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2444                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2445            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2446                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2447      Requires<[hasHWROT32]>;
2448
2449def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2450          (PACK_TWO_INT32
2451            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2452                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2453            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2454                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2455      Requires<[hasHWROT32]>;
2456
2457
2458def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2459          (PACK_TWO_INT32
2460            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2461                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2462            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2463                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2464      Requires<[hasHWROT32]>;
2465
2466def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2467          (PACK_TWO_INT32
2468            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2469                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2470            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2471                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2472      Requires<[hasHWROT32]>;
2473
2474// SW version of rotate 64
2475def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2476          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2477      Requires<[noHWROT32]>;
2478def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2479          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2480      Requires<[noHWROT32]>;
2481def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2482          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2483      Requires<[noHWROT32]>;
2484def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2485          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2486      Requires<[noHWROT32]>;
2487
2488
2489//-----------------------------------
2490// Texture Intrinsics
2491//-----------------------------------
2492
2493// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2494// also defined in NVPTXReplaceImageHandles.cpp
2495
2496// texmode_independent
2497let IsTex = true, IsTexModeUnified = false in {
2498// Texture fetch instructions using handles
2499
2500class TEX_1D_base<string inst, NVPTXRegClass outtype,
2501                  NVPTXRegClass intype, dag texsamp>
2502    : NVPTXInst<(outs outtype:$r, outtype:$g,
2503                      outtype:$b, outtype:$a),
2504                 !con(texsamp, (ins intype:$x)),
2505                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2506                 []>;
2507
2508multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2509  def _RR : TEX_1D_base<inst, outtype, intype,
2510                        (ins Int64Regs:$t, Int64Regs:$s)>;
2511  def _RI : TEX_1D_base<inst, outtype, intype,
2512                        (ins Int64Regs:$t, i64imm:$s)>;
2513  def _IR : TEX_1D_base<inst, outtype, intype,
2514                        (ins i64imm:$t, Int64Regs:$s)>;
2515  def _II : TEX_1D_base<inst, outtype, intype,
2516                        (ins i64imm:$t, i64imm:$s)>;
2517}
2518
2519defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2520defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2521defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2522defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2523defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2524defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2525
2526class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2527                        NVPTXRegClass intype, dag texsamp>
2528    : NVPTXInst<(outs outtype:$r, outtype:$g,
2529                      outtype:$b, outtype:$a),
2530                 !con(texsamp, (ins intype:$x, intype:$lod)),
2531                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2532                 []>;
2533
2534multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2535                        NVPTXRegClass intype> {
2536  def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2537                              (ins Int64Regs:$t, Int64Regs:$s)>;
2538  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2539                              (ins Int64Regs:$t, i64imm:$s)>;
2540  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2541                              (ins i64imm:$t, Int64Regs:$s)>;
2542  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2543                              (ins i64imm:$t, i64imm:$s)>;
2544}
2545
2546defm TEX_1D_F32_F32_LEVEL :
2547  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2548defm TEX_1D_S32_F32_LEVEL :
2549  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2550defm TEX_1D_U32_F32_LEVEL :
2551  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2552
2553class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2554                       NVPTXRegClass intype, dag texsamp>
2555    : NVPTXInst<(outs outtype:$r, outtype:$g,
2556                      outtype:$b, outtype:$a),
2557                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2558                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2559                        " \\{$gradx\\}, \\{$grady\\};",
2560                 []>;
2561
2562multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2563                       NVPTXRegClass intype> {
2564  def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2565                             (ins Int64Regs:$t, Int64Regs:$s)>;
2566  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2567                             (ins Int64Regs:$t, i64imm:$s)>;
2568  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2569                             (ins i64imm:$t, Int64Regs:$s)>;
2570  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2571                             (ins i64imm:$t, i64imm:$s)>;
2572}
2573
2574defm TEX_1D_F32_F32_GRAD
2575  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2576defm TEX_1D_S32_F32_GRAD
2577  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2578defm TEX_1D_U32_F32_GRAD
2579  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2580
2581class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2582                        NVPTXRegClass intype, dag texsamp>
2583    : NVPTXInst<(outs outtype:$r, outtype:$g,
2584                      outtype:$b, outtype:$a),
2585                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2586                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2587                 []>;
2588
2589multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2590                        NVPTXRegClass intype> {
2591  def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2592                              (ins Int64Regs:$t, Int64Regs:$s)>;
2593  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2594                              (ins Int64Regs:$t, i64imm:$s)>;
2595  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2596                              (ins i64imm:$t, Int64Regs:$s)>;
2597  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2598                              (ins i64imm:$t, i64imm:$s)>;
2599}
2600
2601defm TEX_1D_ARRAY_F32_F32
2602  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2603defm TEX_1D_ARRAY_F32_S32
2604  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2605defm TEX_1D_ARRAY_S32_S32
2606  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2607defm TEX_1D_ARRAY_S32_F32
2608  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2609defm TEX_1D_ARRAY_U32_S32
2610  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2611defm TEX_1D_ARRAY_U32_F32
2612  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2613
2614class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2615                              NVPTXRegClass intype, dag texsamp>
2616    : NVPTXInst<(outs outtype:$r, outtype:$g,
2617                      outtype:$b, outtype:$a),
2618                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2619                 inst # " \t\\{$r, $g, $b, $a\\},"
2620                        " [$t, $s, \\{$l, $x\\}], $lod;",
2621                 []>;
2622
2623multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2624                              NVPTXRegClass intype> {
2625  def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2626                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2627  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2628                                    (ins Int64Regs:$t, i64imm:$s)>;
2629  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2630                                    (ins i64imm:$t, Int64Regs:$s)>;
2631  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2632                                    (ins i64imm:$t, i64imm:$s)>;
2633}
2634
2635defm TEX_1D_ARRAY_F32_F32_LEVEL
2636  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2637defm TEX_1D_ARRAY_S32_F32_LEVEL
2638  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2639defm TEX_1D_ARRAY_U32_F32_LEVEL
2640  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2641
2642class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2643                             NVPTXRegClass intype, dag texsamp>
2644    : NVPTXInst<(outs outtype:$r, outtype:$g,
2645                      outtype:$b, outtype:$a),
2646                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2647                                    intype:$gradx, intype:$grady)),
2648                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2649                        " \\{$gradx\\}, \\{$grady\\};",
2650                 []>;
2651
2652multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2653                             NVPTXRegClass intype> {
2654  def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2655                                   (ins Int64Regs:$t, Int64Regs:$s)>;
2656  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2657                                   (ins Int64Regs:$t, i64imm:$s)>;
2658  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2659                                   (ins i64imm:$t, Int64Regs:$s)>;
2660  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2661                                   (ins i64imm:$t, i64imm:$s)>;
2662}
2663
2664defm TEX_1D_ARRAY_F32_F32_GRAD
2665  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2666defm TEX_1D_ARRAY_S32_F32_GRAD
2667  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2668defm TEX_1D_ARRAY_U32_F32_GRAD
2669  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2670
2671class TEX_2D_base<string inst, NVPTXRegClass outtype,
2672                  NVPTXRegClass intype, dag texsamp>
2673    : NVPTXInst<(outs outtype:$r, outtype:$g,
2674                      outtype:$b, outtype:$a),
2675                 !con(texsamp, (ins intype:$x, intype:$y)),
2676                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2677                 []>;
2678
2679multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2680  def _RR : TEX_2D_base<inst, outtype, intype,
2681                        (ins Int64Regs:$t, Int64Regs:$s)>;
2682  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2683  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2684  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2685}
2686
2687defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2688defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2689defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2690defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2691defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2692defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2693
2694class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2695                        NVPTXRegClass intype, dag texsamp>
2696    : NVPTXInst<(outs outtype:$r, outtype:$g,
2697                      outtype:$b, outtype:$a),
2698                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2699                 inst # " \t\\{$r, $g, $b, $a\\},"
2700                        " [$t, $s, \\{$x, $y\\}], $lod;",
2701                 []>;
2702
2703multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2704                        NVPTXRegClass intype> {
2705  def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2706                              (ins Int64Regs:$t, Int64Regs:$s)>;
2707  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2708                              (ins Int64Regs:$t, i64imm:$s)>;
2709  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2710                              (ins i64imm:$t, Int64Regs:$s)>;
2711  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2712                              (ins i64imm:$t, i64imm:$s)>;
2713}
2714
2715defm TEX_2D_F32_F32_LEVEL :
2716  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2717defm TEX_2D_S32_F32_LEVEL :
2718  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2719defm TEX_2D_U32_F32_LEVEL :
2720  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2721
2722class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2723                       NVPTXRegClass intype, dag texsamp>
2724    : NVPTXInst<(outs outtype:$r, outtype:$g,
2725                      outtype:$b, outtype:$a),
2726                 !con(texsamp, (ins intype:$x, intype:$y,
2727                                    intype:$gradx0, intype:$gradx1,
2728                                    intype:$grady0, intype:$grady1)),
2729                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2730                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2731                 []>;
2732
2733multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2734                       NVPTXRegClass intype> {
2735  def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2736                              (ins Int64Regs:$t, Int64Regs:$s)>;
2737  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2738                              (ins Int64Regs:$t, i64imm:$s)>;
2739  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2740                              (ins i64imm:$t, Int64Regs:$s)>;
2741  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2742                              (ins i64imm:$t, i64imm:$s)>;
2743}
2744
2745defm TEX_2D_F32_F32_GRAD :
2746  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2747defm TEX_2D_S32_F32_GRAD :
2748  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2749defm TEX_2D_U32_F32_GRAD :
2750  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2751
2752class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2753                        NVPTXRegClass intype, dag texsamp>
2754    : NVPTXInst<(outs outtype:$r, outtype:$g,
2755                      outtype:$b, outtype:$a),
2756                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2757                 inst # " \t\\{$r, $g, $b, $a\\},"
2758                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
2759                 []>;
2760
2761multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2762                        NVPTXRegClass intype> {
2763  def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2764                              (ins Int64Regs:$t, Int64Regs:$s)>;
2765  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2766                              (ins Int64Regs:$t, i64imm:$s)>;
2767  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2768                              (ins i64imm:$t, Int64Regs:$s)>;
2769  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2770                              (ins i64imm:$t, i64imm:$s)>;
2771}
2772
2773defm TEX_2D_ARRAY_F32_F32
2774  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2775defm TEX_2D_ARRAY_F32_S32
2776  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2777defm TEX_2D_ARRAY_S32_S32
2778  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2779defm TEX_2D_ARRAY_S32_F32
2780  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2781defm TEX_2D_ARRAY_U32_S32
2782  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
2783defm TEX_2D_ARRAY_U32_F32
2784  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2785
2786class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2787                              NVPTXRegClass intype, dag texsamp>
2788    : NVPTXInst<(outs outtype:$r, outtype:$g,
2789                      outtype:$b, outtype:$a),
2790                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2791                                    intype:$lod)),
2792                 inst # " \t\\{$r, $g, $b, $a\\},"
2793                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2794                 []>;
2795
2796multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2797                              NVPTXRegClass intype> {
2798  def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2799                              (ins Int64Regs:$t, Int64Regs:$s)>;
2800  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2801                              (ins Int64Regs:$t, i64imm:$s)>;
2802  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2803                              (ins i64imm:$t, Int64Regs:$s)>;
2804  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2805                              (ins i64imm:$t, i64imm:$s)>;
2806}
2807
2808defm TEX_2D_ARRAY_F32_F32_LEVEL
2809  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2810defm TEX_2D_ARRAY_S32_F32_LEVEL
2811  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2812defm TEX_2D_ARRAY_U32_F32_LEVEL
2813  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2814
2815class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2816                             NVPTXRegClass intype, dag texsamp>
2817    : NVPTXInst<(outs outtype:$r, outtype:$g,
2818                      outtype:$b, outtype:$a),
2819                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2820                                    intype:$gradx0, intype:$gradx1,
2821                                    intype:$grady0, intype:$grady1)),
2822                 inst # " \t\\{$r, $g, $b, $a\\},"
2823                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
2824                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2825                 []>;
2826
2827multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2828                             NVPTXRegClass intype> {
2829  def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
2830                              (ins Int64Regs:$t, Int64Regs:$s)>;
2831  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
2832                              (ins Int64Regs:$t, i64imm:$s)>;
2833  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
2834                              (ins i64imm:$t, Int64Regs:$s)>;
2835  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
2836                              (ins i64imm:$t, i64imm:$s)>;
2837}
2838
2839defm TEX_2D_ARRAY_F32_F32_GRAD
2840  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2841defm TEX_2D_ARRAY_S32_F32_GRAD
2842  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2843defm TEX_2D_ARRAY_U32_F32_GRAD
2844  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2845
2846class TEX_3D_base<string inst, NVPTXRegClass outtype,
2847                  NVPTXRegClass intype, dag texsamp>
2848    : NVPTXInst<(outs outtype:$r, outtype:$g,
2849                      outtype:$b, outtype:$a),
2850                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
2851                 inst # " \t\\{$r, $g, $b, $a\\},"
2852                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
2853                 []>;
2854
2855multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2856  def _RR : TEX_3D_base<inst, outtype, intype,
2857                              (ins Int64Regs:$t, Int64Regs:$s)>;
2858  def _RI : TEX_3D_base<inst, outtype, intype,
2859                              (ins Int64Regs:$t, i64imm:$s)>;
2860  def _IR : TEX_3D_base<inst, outtype, intype,
2861                              (ins i64imm:$t, Int64Regs:$s)>;
2862  def _II : TEX_3D_base<inst, outtype, intype,
2863                              (ins i64imm:$t, i64imm:$s)>;
2864}
2865
2866defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
2867defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
2868defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
2869defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
2870defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
2871defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
2872
2873class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
2874                        NVPTXRegClass intype, dag texsamp>
2875    : NVPTXInst<(outs outtype:$r, outtype:$g,
2876                      outtype:$b, outtype:$a),
2877                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
2878                                    intype:$lod)),
2879                 inst # " \t\\{$r, $g, $b, $a\\},"
2880                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2881                 []>;
2882
2883multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
2884                        NVPTXRegClass intype> {
2885  def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
2886                              (ins Int64Regs:$t, Int64Regs:$s)>;
2887  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
2888                              (ins Int64Regs:$t, i64imm:$s)>;
2889  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
2890                              (ins i64imm:$t, Int64Regs:$s)>;
2891  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
2892                              (ins i64imm:$t, i64imm:$s)>;
2893}
2894
2895defm TEX_3D_F32_F32_LEVEL
2896  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
2897defm TEX_3D_S32_F32_LEVEL
2898  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
2899defm TEX_3D_U32_F32_LEVEL
2900  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
2901
2902class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
2903                       NVPTXRegClass intype, dag texsamp>
2904    : NVPTXInst<(outs outtype:$r, outtype:$g,
2905                      outtype:$b, outtype:$a),
2906                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
2907                                    intype :$gradx0, intype:$gradx1,
2908                                    intype:$gradx2, intype:$grady0,
2909                                    intype:$grady1, intype:$grady2)),
2910                 inst # " \t\\{$r, $g, $b, $a\\},"
2911                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
2912                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
2913                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
2914                 []>;
2915
2916multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
2917                       NVPTXRegClass intype> {
2918  def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
2919                             (ins Int64Regs:$t, Int64Regs:$s)>;
2920  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
2921                             (ins Int64Regs:$t, i64imm:$s)>;
2922  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
2923                             (ins i64imm:$t, Int64Regs:$s)>;
2924  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
2925                             (ins i64imm:$t, i64imm:$s)>;
2926}
2927
2928defm TEX_3D_F32_F32_GRAD
2929  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
2930defm TEX_3D_S32_F32_GRAD
2931  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
2932defm TEX_3D_U32_F32_GRAD
2933  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
2934
2935class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
2936                    NVPTXRegClass intype, dag texsamp>
2937    : NVPTXInst<(outs outtype:$r, outtype:$g,
2938                      outtype:$b, outtype:$a),
2939                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
2940                 inst # " \t\\{$r, $g, $b, $a\\},"
2941                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
2942                 []>;
2943
2944multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2945  def _RR : TEX_CUBE_base<inst, outtype, intype,
2946                          (ins Int64Regs:$t, Int64Regs:$s)>;
2947  def _RI : TEX_CUBE_base<inst, outtype, intype,
2948                          (ins Int64Regs:$t, i64imm:$s)>;
2949  def _IR : TEX_CUBE_base<inst, outtype, intype,
2950                          (ins i64imm:$t, Int64Regs:$s)>;
2951  def _II : TEX_CUBE_base<inst, outtype, intype,
2952                          (ins i64imm:$t, i64imm:$s)>;
2953}
2954
2955defm TEX_CUBE_F32_F32
2956  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
2957defm TEX_CUBE_S32_F32
2958  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
2959defm TEX_CUBE_U32_F32
2960  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
2961
2962class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
2963                          NVPTXRegClass intype, dag texsamp>
2964    : NVPTXInst<(outs outtype:$r, outtype:$g,
2965                      outtype:$b, outtype:$a),
2966                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
2967                                    intype:$lod)),
2968                 inst # " \t\\{$r, $g, $b, $a\\},"
2969                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2970                 []>;
2971
2972multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
2973                          NVPTXRegClass intype> {
2974  def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
2975                                (ins Int64Regs:$t, Int64Regs:$s)>;
2976  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
2977                                (ins Int64Regs:$t, i64imm:$s)>;
2978  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
2979                                (ins i64imm:$t, Int64Regs:$s)>;
2980  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
2981                                (ins i64imm:$t, i64imm:$s)>;
2982}
2983
2984defm TEX_CUBE_F32_F32_LEVEL
2985  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
2986defm TEX_CUBE_S32_F32_LEVEL
2987  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
2988defm TEX_CUBE_U32_F32_LEVEL
2989  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
2990
2991class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
2992                          NVPTXRegClass intype, dag texsamp>
2993    : NVPTXInst<(outs outtype:$r, outtype:$g,
2994                      outtype:$b, outtype:$a),
2995                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2996                                    intype:$z)),
2997                 inst # " \t\\{$r, $g, $b, $a\\},"
2998                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
2999                 []>;
3000
3001multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3002                          NVPTXRegClass intype> {
3003  def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3004                                (ins Int64Regs:$t, Int64Regs:$s)>;
3005  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3006                                (ins Int64Regs:$t, i64imm:$s)>;
3007  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3008                                (ins i64imm:$t, Int64Regs:$s)>;
3009  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3010                                (ins i64imm:$t, i64imm:$s)>;
3011}
3012
3013defm TEX_CUBE_ARRAY_F32_F32
3014  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3015defm TEX_CUBE_ARRAY_S32_F32
3016  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3017defm TEX_CUBE_ARRAY_U32_F32
3018  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3019
3020class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3021                                NVPTXRegClass intype, dag texsamp>
3022    : NVPTXInst<(outs outtype:$r, outtype:$g,
3023                      outtype:$b, outtype:$a),
3024                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3025                                    intype:$z, intype:$lod)),
3026                 inst # " \t\\{$r, $g, $b, $a\\},"
3027                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
3028                 []>;
3029
3030multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3031                                NVPTXRegClass intype> {
3032  def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3033                                      (ins Int64Regs:$t, Int64Regs:$s)>;
3034  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3035                                      (ins Int64Regs:$t, i64imm:$s)>;
3036  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3037                                      (ins i64imm:$t, Int64Regs:$s)>;
3038  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3039                                      (ins i64imm:$t, i64imm:$s)>;
3040}
3041
3042defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3043  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3044                         Float32Regs, Float32Regs>;
3045defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3046  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3047                         Int32Regs, Float32Regs>;
3048defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3049  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3050                         Int32Regs, Float32Regs>;
3051
3052class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3053                   NVPTXRegClass intype, dag texsamp>
3054    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3055                      outtype:$v2, outtype:$v3),
3056                 !con(texsamp, (ins intype:$x, intype:$y)),
3057                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3058                 []>;
3059
3060multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3061  def _RR : TLD4_2D_base<inst, outtype, intype,
3062                         (ins Int64Regs:$t, Int64Regs:$s)>;
3063  def _RI : TLD4_2D_base<inst, outtype, intype,
3064                         (ins Int64Regs:$t, i64imm:$s)>;
3065  def _IR : TLD4_2D_base<inst, outtype, intype,
3066                         (ins i64imm:$t, Int64Regs:$s)>;
3067  def _II : TLD4_2D_base<inst, outtype, intype,
3068                         (ins i64imm:$t, i64imm:$s)>;
3069}
3070
3071defm TLD4_R_2D_F32_F32
3072  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3073defm TLD4_G_2D_F32_F32
3074  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3075defm TLD4_B_2D_F32_F32
3076  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3077defm TLD4_A_2D_F32_F32
3078  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3079
3080defm TLD4_R_2D_S32_F32
3081  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3082defm TLD4_G_2D_S32_F32
3083  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3084defm TLD4_B_2D_S32_F32
3085  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3086defm TLD4_A_2D_S32_F32
3087  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3088
3089defm TLD4_R_2D_U32_F32
3090  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3091defm TLD4_G_2D_U32_F32
3092  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3093defm TLD4_B_2D_U32_F32
3094  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3095defm TLD4_A_2D_U32_F32
3096  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3097
3098}
3099
3100
3101// texmode_unified
3102let IsTex = true, IsTexModeUnified = true in {
3103// Texture fetch instructions using handles
3104
3105class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3106                          NVPTXRegClass intype, dag tex>
3107    : NVPTXInst<(outs outtype:$r, outtype:$g,
3108                      outtype:$b, outtype:$a),
3109                 !con(tex, (ins intype:$x)),
3110                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3111                 []>;
3112
3113multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3114                          NVPTXRegClass intype> {
3115  def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3116  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3117}
3118
3119defm TEX_UNIFIED_1D_F32_S32
3120  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3121defm TEX_UNIFIED_1D_F32_F32
3122  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3123defm TEX_UNIFIED_1D_S32_S32
3124  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3125defm TEX_UNIFIED_1D_S32_F32
3126  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3127defm TEX_UNIFIED_1D_U32_S32
3128  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3129defm TEX_UNIFIED_1D_U32_F32
3130  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3131
3132class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3133                                NVPTXRegClass intype, dag tex>
3134    : NVPTXInst<(outs outtype:$r, outtype:$g,
3135                      outtype:$b, outtype:$a),
3136                 !con(tex, (ins intype:$x, intype:$lod)),
3137                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3138                 []>;
3139
3140multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3141                                NVPTXRegClass intype> {
3142  def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3143  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3144}
3145
3146defm TEX_UNIFIED_1D_F32_F32_LEVEL
3147  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3148defm TEX_UNIFIED_1D_S32_F32_LEVEL
3149  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3150defm TEX_UNIFIED_1D_U32_F32_LEVEL
3151  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3152
3153class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3154                               NVPTXRegClass intype, dag tex>
3155    : NVPTXInst<(outs outtype:$r, outtype:$g,
3156                      outtype:$b, outtype:$a),
3157                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3158                 inst # " \t\\{$r, $g, $b, $a\\},"
3159                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3160                 []>;
3161
3162multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3163                               NVPTXRegClass intype> {
3164  def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3165  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3166}
3167
3168defm TEX_UNIFIED_1D_F32_F32_GRAD
3169  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3170defm TEX_UNIFIED_1D_S32_F32_GRAD
3171  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3172defm TEX_UNIFIED_1D_U32_F32_GRAD
3173  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3174
3175class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3176                                NVPTXRegClass intype, dag tex>
3177    : NVPTXInst<(outs outtype:$r, outtype:$g,
3178                      outtype:$b, outtype:$a),
3179                 !con(tex, (ins Int32Regs:$l, intype:$x)),
3180                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3181                 []>;
3182
3183multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3184                                NVPTXRegClass intype> {
3185  def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3186  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3187}
3188
3189defm TEX_UNIFIED_1D_ARRAY_F32_S32
3190  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3191defm TEX_UNIFIED_1D_ARRAY_F32_F32
3192  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3193defm TEX_UNIFIED_1D_ARRAY_S32_S32
3194  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3195defm TEX_UNIFIED_1D_ARRAY_S32_F32
3196  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3197defm TEX_UNIFIED_1D_ARRAY_U32_S32
3198  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3199defm TEX_UNIFIED_1D_ARRAY_U32_F32
3200  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3201
3202class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3203                                      NVPTXRegClass intype, dag tex>
3204    : NVPTXInst<(outs outtype:$r, outtype:$g,
3205                      outtype:$b, outtype:$a),
3206                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3207                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3208                 []>;
3209
3210multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3211                                      NVPTXRegClass intype> {
3212  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3213                                           (ins Int64Regs:$t)>;
3214  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3215                                           (ins i64imm:$t)>;
3216}
3217
3218defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3219  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3220                               Float32Regs, Float32Regs>;
3221defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3222  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3223                               Int32Regs, Float32Regs>;
3224defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3225  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3226                               Int32Regs, Float32Regs>;
3227
3228class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3229                                     NVPTXRegClass intype, dag tex>
3230    : NVPTXInst<(outs outtype:$r, outtype:$g,
3231                      outtype:$b, outtype:$a),
3232                 !con(tex, (ins Int32Regs:$l, intype:$x,
3233                                intype:$gradx, intype:$grady)),
3234                 inst # " \t\\{$r, $g, $b, $a\\},"
3235                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3236                 []>;
3237
3238multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3239                                     NVPTXRegClass intype> {
3240  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3241                                          (ins Int64Regs:$t)>;
3242  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3243                                          (ins i64imm:$t)>;
3244}
3245
3246defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3247  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3248                              Float32Regs, Float32Regs>;
3249defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3250  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3251                              Int32Regs, Float32Regs>;
3252defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3253  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3254                              Int32Regs, Float32Regs>;
3255
3256class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3257                          NVPTXRegClass intype, dag tex>
3258    : NVPTXInst<(outs outtype:$r, outtype:$g,
3259                      outtype:$b, outtype:$a),
3260                 !con(tex, (ins intype:$x, intype:$y)),
3261                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3262                 []>;
3263
3264multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3265                          NVPTXRegClass intype> {
3266  def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3267  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3268}
3269
3270defm TEX_UNIFIED_2D_F32_S32
3271  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3272defm TEX_UNIFIED_2D_F32_F32
3273  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3274defm TEX_UNIFIED_2D_S32_S32
3275  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3276defm TEX_UNIFIED_2D_S32_F32
3277  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3278defm TEX_UNIFIED_2D_U32_S32
3279  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3280defm TEX_UNIFIED_2D_U32_F32
3281  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3282
3283class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3284                                NVPTXRegClass intype, dag tex>
3285    : NVPTXInst<(outs outtype:$r, outtype:$g,
3286                      outtype:$b, outtype:$a),
3287                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3288                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3289                 []>;
3290
3291multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3292                                NVPTXRegClass intype> {
3293  def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3294  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3295}
3296
3297defm TEX_UNIFIED_2D_F32_F32_LEVEL
3298  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3299defm TEX_UNIFIED_2D_S32_F32_LEVEL
3300  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3301defm TEX_UNIFIED_2D_U32_F32_LEVEL
3302  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3303
3304class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3305                               NVPTXRegClass intype, dag tex>
3306    : NVPTXInst<(outs outtype:$r, outtype:$g,
3307                      outtype:$b, outtype:$a),
3308                 !con(tex, (ins intype:$x, intype:$y,
3309                                intype:$gradx0, intype:$gradx1,
3310                                intype:$grady0, intype:$grady1)),
3311                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3312                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3313                 []>;
3314multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3315                               NVPTXRegClass intype> {
3316  def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3317  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3318}
3319
3320defm TEX_UNIFIED_2D_F32_F32_GRAD
3321  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3322defm TEX_UNIFIED_2D_S32_F32_GRAD
3323  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3324defm TEX_UNIFIED_2D_U32_F32_GRAD
3325  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3326
3327class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3328                                NVPTXRegClass intype, dag tex>
3329    : NVPTXInst<(outs outtype:$r, outtype:$g,
3330                      outtype:$b, outtype:$a),
3331                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3332                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3333                 []>;
3334multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3335                                NVPTXRegClass intype> {
3336  def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3337  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3338}
3339
3340defm TEX_UNIFIED_2D_ARRAY_F32_S32
3341  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3342defm TEX_UNIFIED_2D_ARRAY_F32_F32
3343  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3344defm TEX_UNIFIED_2D_ARRAY_S32_S32
3345  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3346defm TEX_UNIFIED_2D_ARRAY_S32_F32
3347  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3348defm TEX_UNIFIED_2D_ARRAY_U32_S32
3349  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3350defm TEX_UNIFIED_2D_ARRAY_U32_F32
3351  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3352
3353class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3354                                      NVPTXRegClass intype, dag tex>
3355    : NVPTXInst<(outs outtype:$r, outtype:$g,
3356                      outtype:$b, outtype:$a),
3357                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3358                                intype:$lod)),
3359                 inst # " \t\\{$r, $g, $b, $a\\},"
3360                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3361                 []>;
3362multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3363                                      NVPTXRegClass intype> {
3364  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3365                                           (ins Int64Regs:$t)>;
3366  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3367                                           (ins i64imm:$t)>;
3368}
3369
3370defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3371  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3372                               Float32Regs, Float32Regs>;
3373defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3374  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3375                               Int32Regs, Float32Regs>;
3376defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3377  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3378                               Int32Regs, Float32Regs>;
3379
3380class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3381                                     NVPTXRegClass intype, dag tex>
3382    : NVPTXInst<(outs outtype:$r, outtype:$g,
3383                      outtype:$b, outtype:$a),
3384                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3385                                intype:$gradx0, intype:$gradx1,
3386                                intype:$grady0, intype:$grady1)),
3387                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3388                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3389                 []>;
3390multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3391                                     NVPTXRegClass intype> {
3392  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3393                                          (ins Int64Regs:$t)>;
3394  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3395                                          (ins i64imm:$t)>;
3396}
3397
3398defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3399  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3400                              Float32Regs, Float32Regs>;
3401defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3402  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3403                              Int32Regs, Float32Regs>;
3404defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3405  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3406                              Int32Regs, Float32Regs>;
3407
3408class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3409                          NVPTXRegClass intype, dag tex>
3410    : NVPTXInst<(outs outtype:$r, outtype:$g,
3411                      outtype:$b, outtype:$a),
3412                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3413                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3414                 []>;
3415multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3416                          NVPTXRegClass intype> {
3417  def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3418  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3419}
3420
3421defm TEX_UNIFIED_3D_F32_S32
3422  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3423defm TEX_UNIFIED_3D_F32_F32
3424  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3425defm TEX_UNIFIED_3D_S32_S32
3426  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3427defm TEX_UNIFIED_3D_S32_F32
3428  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3429defm TEX_UNIFIED_3D_U32_S32
3430  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3431defm TEX_UNIFIED_3D_U32_F32
3432  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3433
3434class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3435                                NVPTXRegClass intype, dag tex>
3436    : NVPTXInst<(outs outtype:$r, outtype:$g,
3437                      outtype:$b, outtype:$a),
3438                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3439                 inst # " \t\\{$r, $g, $b, $a\\},"
3440                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3441                 []>;
3442multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3443                                NVPTXRegClass intype> {
3444  def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3445  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3446}
3447
3448defm TEX_UNIFIED_3D_F32_F32_LEVEL
3449  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3450defm TEX_UNIFIED_3D_S32_F32_LEVEL
3451  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3452defm TEX_UNIFIED_3D_U32_F32_LEVEL
3453  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3454
3455class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3456                               NVPTXRegClass intype, dag tex>
3457    : NVPTXInst<(outs outtype:$r, outtype:$g,
3458                      outtype:$b, outtype:$a),
3459                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3460                                intype:$gradx0, intype:$gradx1,
3461                                intype:$gradx2, intype:$grady0,
3462                                intype:$grady1, intype:$grady2)),
3463                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3464                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3465                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3466                 []>;
3467multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3468                               NVPTXRegClass intype> {
3469  def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3470  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3471}
3472
3473defm TEX_UNIFIED_3D_F32_F32_GRAD
3474  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3475defm TEX_UNIFIED_3D_S32_F32_GRAD
3476  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3477defm TEX_UNIFIED_3D_U32_F32_GRAD
3478  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3479
3480class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3481                            NVPTXRegClass intype, dag tex>
3482    : NVPTXInst<(outs outtype:$r, outtype:$g,
3483                      outtype:$b, outtype:$a),
3484                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3485                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3486                 []>;
3487multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3488                            NVPTXRegClass intype> {
3489  def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3490  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3491}
3492
3493defm TEX_UNIFIED_CUBE_F32_F32
3494  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3495defm TEX_UNIFIED_CUBE_S32_F32
3496  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3497defm TEX_UNIFIED_CUBE_U32_F32
3498  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3499
3500class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3501                                  NVPTXRegClass intype, dag tex>
3502    : NVPTXInst<(outs outtype:$r, outtype:$g,
3503                      outtype:$b, outtype:$a),
3504                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3505                 inst # " \t\\{$r, $g, $b, $a\\},"
3506                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3507                 []>;
3508multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3509                                  NVPTXRegClass intype> {
3510  def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3511                                       (ins Int64Regs:$t)>;
3512  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3513                                       (ins i64imm:$t)>;
3514}
3515
3516defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3517  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3518                           Float32Regs, Float32Regs>;
3519defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3520  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3521                           Int32Regs, Float32Regs>;
3522defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3523  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3524                           Int32Regs, Float32Regs>;
3525
3526class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3527                                  NVPTXRegClass intype, dag tex>
3528    : NVPTXInst<(outs outtype:$r, outtype:$g,
3529                      outtype:$b, outtype:$a),
3530                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3531                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3532                 []>;
3533multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3534                                  NVPTXRegClass intype> {
3535  def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3536                                       (ins Int64Regs:$t)>;
3537  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3538                                       (ins i64imm:$t)>;
3539}
3540
3541defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3542  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3543defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3544  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3545defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3546  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3547
3548class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3549                                        NVPTXRegClass intype, dag tex>
3550    : NVPTXInst<(outs outtype:$r, outtype:$g,
3551                      outtype:$b, outtype:$a),
3552                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3553                                intype:$lod)),
3554                 inst # " \t\\{$r, $g, $b, $a\\},"
3555                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3556                 []>;
3557multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3558                                        NVPTXRegClass intype> {
3559  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3560                                             (ins Int64Regs:$t)>;
3561  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3562                                             (ins i64imm:$t)>;
3563}
3564
3565defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3566  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3567                                 Float32Regs, Float32Regs>;
3568defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3569  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3570                                 Int32Regs, Float32Regs>;
3571defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3572  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3573                                 Int32Regs, Float32Regs>;
3574
3575class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3576                           NVPTXRegClass intype, dag tex>
3577    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3578                      outtype:$v2, outtype:$v3),
3579                 !con(tex, (ins intype:$x, intype:$y)),
3580                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3581                 []>;
3582multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3583                           NVPTXRegClass intype> {
3584  def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3585  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3586}
3587
3588defm TLD4_UNIFIED_R_2D_F32_F32
3589  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3590defm TLD4_UNIFIED_G_2D_F32_F32
3591  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3592defm TLD4_UNIFIED_B_2D_F32_F32
3593  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3594defm TLD4_UNIFIED_A_2D_F32_F32
3595  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3596
3597defm TLD4_UNIFIED_R_2D_S32_F32
3598  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3599defm TLD4_UNIFIED_G_2D_S32_F32
3600  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3601defm TLD4_UNIFIED_B_2D_S32_F32
3602  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3603defm TLD4_UNIFIED_A_2D_S32_F32
3604  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3605
3606defm TLD4_UNIFIED_R_2D_U32_F32
3607  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3608defm TLD4_UNIFIED_G_2D_U32_F32
3609  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3610defm TLD4_UNIFIED_B_2D_U32_F32
3611  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3612defm TLD4_UNIFIED_A_2D_U32_F32
3613  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3614
3615}
3616
3617
3618
3619//=== Surface load instructions
3620
3621let IsSuld = true in {
3622
3623class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3624    : NVPTXInst<(outs outtype:$r),
3625                !con(surf, (ins Int32Regs:$x)),
3626                inst # " \\{$r\\}, [$s, \\{$x\\}];",
3627                []>;
3628multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3629  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3630  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3631}
3632
3633defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3634defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3635defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3636defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3637
3638defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3639defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3640defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3641defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3642
3643defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3644defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3645defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3646defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3647
3648class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3649    : NVPTXInst<(outs outtype:$r),
3650                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3651                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3652                []>;
3653multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3654  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3655  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3656}
3657
3658defm SULD_1D_ARRAY_I8_CLAMP
3659  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3660defm SULD_1D_ARRAY_I16_CLAMP
3661  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3662defm SULD_1D_ARRAY_I32_CLAMP
3663  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3664defm SULD_1D_ARRAY_I64_CLAMP
3665  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3666
3667defm SULD_1D_ARRAY_I8_TRAP
3668  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3669defm SULD_1D_ARRAY_I16_TRAP
3670  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3671defm SULD_1D_ARRAY_I32_TRAP
3672  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3673defm SULD_1D_ARRAY_I64_TRAP
3674  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3675
3676defm SULD_1D_ARRAY_I8_ZERO
3677  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3678defm SULD_1D_ARRAY_I16_ZERO
3679  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3680defm SULD_1D_ARRAY_I32_ZERO
3681  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3682defm SULD_1D_ARRAY_I64_ZERO
3683  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3684
3685class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3686    : NVPTXInst<(outs outtype:$r),
3687                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3688                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3689                []>;
3690multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3691  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3692  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3693}
3694
3695defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3696defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3697defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3698defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3699
3700defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3701defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3702defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3703defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3704
3705defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3706defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3707defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3708defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3709
3710class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3711    : NVPTXInst<(outs outtype:$r),
3712                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3713                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3714                []>;
3715multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3716  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3717  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3718}
3719
3720defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3721defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3722defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3723defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
3724
3725defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
3726defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
3727defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
3728defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
3729
3730defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
3731defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
3732defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
3733defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
3734
3735class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
3736    : NVPTXInst<(outs outtype:$r),
3737                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
3738                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3739                []>;
3740multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
3741  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
3742  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
3743}
3744
3745defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
3746defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
3747defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
3748defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
3749
3750defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
3751defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
3752defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
3753defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
3754
3755defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
3756defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
3757defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
3758defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
3759}
3760
3761let IsSuld = 2 in {
3762
3763class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3764    : NVPTXInst<(outs outtype:$r, outtype:$g),
3765                !con(surf, (ins Int32Regs:$x)),
3766                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
3767                []>;
3768multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
3769  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3770  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
3771}
3772
3773defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
3774defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
3775defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
3776defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
3777
3778defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
3779defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
3780defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
3781defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
3782
3783defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
3784defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
3785defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
3786defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
3787
3788class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3789    : NVPTXInst<(outs outtype:$r, outtype:$g),
3790                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3791                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3792                []>;
3793multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
3794  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3795  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
3796}
3797
3798defm SULD_1D_ARRAY_V2I8_CLAMP
3799  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
3800defm SULD_1D_ARRAY_V2I16_CLAMP
3801  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
3802defm SULD_1D_ARRAY_V2I32_CLAMP
3803  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
3804defm SULD_1D_ARRAY_V2I64_CLAMP
3805  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
3806
3807defm SULD_1D_ARRAY_V2I8_TRAP
3808  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
3809defm SULD_1D_ARRAY_V2I16_TRAP
3810  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
3811defm SULD_1D_ARRAY_V2I32_TRAP
3812  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
3813defm SULD_1D_ARRAY_V2I64_TRAP
3814  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
3815
3816defm SULD_1D_ARRAY_V2I8_ZERO
3817  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
3818defm SULD_1D_ARRAY_V2I16_ZERO
3819  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
3820defm SULD_1D_ARRAY_V2I32_ZERO
3821  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
3822defm SULD_1D_ARRAY_V2I64_ZERO
3823  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
3824
3825class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3826    : NVPTXInst<(outs outtype:$r, outtype:$g),
3827                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3828                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3829                []>;
3830multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
3831  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3832  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
3833}
3834
3835defm SULD_2D_V2I8_CLAMP
3836  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
3837defm SULD_2D_V2I16_CLAMP
3838  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
3839defm SULD_2D_V2I32_CLAMP
3840  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
3841defm SULD_2D_V2I64_CLAMP
3842  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
3843
3844defm SULD_2D_V2I8_TRAP
3845  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
3846defm SULD_2D_V2I16_TRAP
3847  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
3848defm SULD_2D_V2I32_TRAP
3849  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
3850defm SULD_2D_V2I64_TRAP
3851  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
3852
3853defm SULD_2D_V2I8_ZERO
3854  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
3855defm SULD_2D_V2I16_ZERO
3856  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
3857defm SULD_2D_V2I32_ZERO
3858  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
3859defm SULD_2D_V2I64_ZERO
3860  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
3861
3862class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3863    : NVPTXInst<(outs outtype:$r, outtype:$g),
3864                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3865                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
3866                []>;
3867multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
3868  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3869  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
3870}
3871
3872defm SULD_2D_ARRAY_V2I8_CLAMP
3873  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
3874defm SULD_2D_ARRAY_V2I16_CLAMP
3875  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
3876defm SULD_2D_ARRAY_V2I32_CLAMP
3877  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
3878defm SULD_2D_ARRAY_V2I64_CLAMP
3879  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
3880
3881defm SULD_2D_ARRAY_V2I8_TRAP
3882  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
3883defm SULD_2D_ARRAY_V2I16_TRAP
3884  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
3885defm SULD_2D_ARRAY_V2I32_TRAP
3886  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
3887defm SULD_2D_ARRAY_V2I64_TRAP
3888  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
3889
3890defm SULD_2D_ARRAY_V2I8_ZERO
3891  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
3892defm SULD_2D_ARRAY_V2I16_ZERO
3893  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
3894defm SULD_2D_ARRAY_V2I32_ZERO
3895  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
3896defm SULD_2D_ARRAY_V2I64_ZERO
3897  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
3898
3899class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3900    : NVPTXInst<(outs outtype:$r, outtype:$g),
3901                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
3902                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3903                []>;
3904multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
3905  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3906  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
3907}
3908
3909defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
3910defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
3911defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
3912defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
3913
3914defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
3915defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
3916defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
3917defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
3918
3919defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
3920defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
3921defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
3922defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
3923
3924}
3925
3926let IsSuld = 3 in {
3927
3928class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
3929    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
3930                !con(surf, (ins Int32Regs:$x)),
3931                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3932                []>;
3933multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
3934  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
3935  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
3936}
3937
3938defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
3939defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
3940defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
3941
3942defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
3943defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
3944defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
3945
3946defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
3947defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
3948defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
3949
3950class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
3951    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
3952                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3953                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
3954                []>;
3955multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
3956  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
3957  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
3958}
3959
3960defm SULD_1D_ARRAY_V4I8_CLAMP
3961  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
3962defm SULD_1D_ARRAY_V4I16_CLAMP
3963  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
3964defm SULD_1D_ARRAY_V4I32_CLAMP
3965  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
3966
3967defm SULD_1D_ARRAY_V4I8_TRAP
3968  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
3969defm SULD_1D_ARRAY_V4I16_TRAP
3970  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
3971defm SULD_1D_ARRAY_V4I32_TRAP
3972  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
3973
3974defm SULD_1D_ARRAY_V4I8_ZERO
3975  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
3976defm SULD_1D_ARRAY_V4I16_ZERO
3977  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
3978defm SULD_1D_ARRAY_V4I32_ZERO
3979  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
3980
3981class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
3982    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
3983                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3984                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3985                []>;
3986multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
3987  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
3988  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
3989}
3990
3991defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
3992defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
3993defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
3994
3995defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
3996defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
3997defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
3998
3999defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4000defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4001defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
4002
4003class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4004    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4005                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4006                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
4007                []>;
4008multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4009  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4010  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
4011}
4012
4013defm SULD_2D_ARRAY_V4I8_CLAMP
4014  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4015defm SULD_2D_ARRAY_V4I16_CLAMP
4016  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4017defm SULD_2D_ARRAY_V4I32_CLAMP
4018  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
4019
4020defm SULD_2D_ARRAY_V4I8_TRAP
4021  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4022defm SULD_2D_ARRAY_V4I16_TRAP
4023  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4024defm SULD_2D_ARRAY_V4I32_TRAP
4025  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
4026
4027defm SULD_2D_ARRAY_V4I8_ZERO
4028  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4029defm SULD_2D_ARRAY_V4I16_ZERO
4030  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4031defm SULD_2D_ARRAY_V4I32_ZERO
4032  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4033
4034class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4035    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4036                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4037                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4038                []>;
4039multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4040  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4041  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4042}
4043
4044defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4045defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4046defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4047
4048defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4049defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4050defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4051
4052defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4053defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4054defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4055
4056}
4057
4058//-----------------------------------
4059// Texture Query Intrinsics
4060//-----------------------------------
4061
4062let IsSurfTexQuery = true in {
4063def TXQ_CHANNEL_ORDER_R
4064  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4065              "txq.channel_order.b32 \t$d, [$a];",
4066              []>;
4067def TXQ_CHANNEL_ORDER_I
4068  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4069              "txq.channel_order.b32 \t$d, [$a];",
4070              []>;
4071def TXQ_CHANNEL_DATA_TYPE_R
4072  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4073              "txq.channel_data_type.b32 \t$d, [$a];",
4074              []>;
4075def TXQ_CHANNEL_DATA_TYPE_I
4076  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4077              "txq.channel_data_type.b32 \t$d, [$a];",
4078              []>;
4079def TXQ_WIDTH_R
4080  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4081              "txq.width.b32 \t$d, [$a];",
4082              []>;
4083def TXQ_WIDTH_I
4084  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4085              "txq.width.b32 \t$d, [$a];",
4086              []>;
4087def TXQ_HEIGHT_R
4088  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4089              "txq.height.b32 \t$d, [$a];",
4090              []>;
4091def TXQ_HEIGHT_I
4092  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4093              "txq.height.b32 \t$d, [$a];",
4094              []>;
4095def TXQ_DEPTH_R
4096  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4097              "txq.depth.b32 \t$d, [$a];",
4098              []>;
4099def TXQ_DEPTH_I
4100  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4101              "txq.depth.b32 \t$d, [$a];",
4102              []>;
4103def TXQ_ARRAY_SIZE_R
4104  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4105              "txq.array_size.b32 \t$d, [$a];",
4106              []>;
4107def TXQ_ARRAY_SIZE_I
4108  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4109              "txq.array_size.b32 \t$d, [$a];",
4110              []>;
4111def TXQ_NUM_SAMPLES_R
4112  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4113              "txq.num_samples.b32 \t$d, [$a];",
4114              []>;
4115def TXQ_NUM_SAMPLES_I
4116  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4117              "txq.num_samples.b32 \t$d, [$a];",
4118              []>;
4119def TXQ_NUM_MIPMAP_LEVELS_R
4120  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4121              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4122              []>;
4123def TXQ_NUM_MIPMAP_LEVELS_I
4124  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4125              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4126              []>;
4127}
4128
4129def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4130          (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4131def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4132          (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4133def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4134          (TXQ_WIDTH_R Int64Regs:$a)>;
4135def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4136          (TXQ_HEIGHT_R Int64Regs:$a)>;
4137def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4138          (TXQ_DEPTH_R Int64Regs:$a)>;
4139def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4140          (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4141def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4142          (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4143def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4144          (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4145
4146
4147//-----------------------------------
4148// Surface Query Intrinsics
4149//-----------------------------------
4150
4151let IsSurfTexQuery = true in {
4152def SUQ_CHANNEL_ORDER_R
4153  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4154              "suq.channel_order.b32 \t$d, [$a];",
4155              []>;
4156def SUQ_CHANNEL_ORDER_I
4157  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4158              "suq.channel_order.b32 \t$d, [$a];",
4159              []>;
4160def SUQ_CHANNEL_DATA_TYPE_R
4161  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4162              "suq.channel_data_type.b32 \t$d, [$a];",
4163              []>;
4164def SUQ_CHANNEL_DATA_TYPE_I
4165  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4166              "suq.channel_data_type.b32 \t$d, [$a];",
4167              []>;
4168def SUQ_WIDTH_R
4169  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4170              "suq.width.b32 \t$d, [$a];",
4171              []>;
4172def SUQ_WIDTH_I
4173  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4174              "suq.width.b32 \t$d, [$a];",
4175              []>;
4176def SUQ_HEIGHT_R
4177  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4178              "suq.height.b32 \t$d, [$a];",
4179              []>;
4180def SUQ_HEIGHT_I
4181  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4182              "suq.height.b32 \t$d, [$a];",
4183              []>;
4184def SUQ_DEPTH_R
4185  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4186              "suq.depth.b32 \t$d, [$a];",
4187              []>;
4188def SUQ_DEPTH_I
4189  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4190              "suq.depth.b32 \t$d, [$a];",
4191              []>;
4192def SUQ_ARRAY_SIZE_R
4193  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4194              "suq.array_size.b32 \t$d, [$a];",
4195              []>;
4196def SUQ_ARRAY_SIZE_I
4197  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4198              "suq.array_size.b32 \t$d, [$a];",
4199              []>;
4200}
4201
4202def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4203          (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4204def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4205          (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4206def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4207          (SUQ_WIDTH_R Int64Regs:$a)>;
4208def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4209          (SUQ_HEIGHT_R Int64Regs:$a)>;
4210def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4211          (SUQ_DEPTH_R Int64Regs:$a)>;
4212def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4213          (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4214
4215
4216//===- Handle Query -------------------------------------------------------===//
4217
4218// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4219def ISTYPEP_SAMPLER
4220  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4221              "istypep.samplerref \t$d, $a;",
4222              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4223def ISTYPEP_SURFACE
4224  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4225              "istypep.surfref \t$d, $a;",
4226              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4227def ISTYPEP_TEXTURE
4228  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4229              "istypep.texref \t$d, $a;",
4230              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4231
4232//===- Surface Stores -----------------------------------------------------===//
4233
4234let IsSust = true in {
4235
4236class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4237    : NVPTXInst<(outs),
4238                !con(surf, (ins Int32Regs:$x, intype:$r)),
4239                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4240                []>;
4241multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4242  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4243  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4244}
4245
4246defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4247defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4248defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4249defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4250
4251defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4252defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4253defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4254defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4255
4256defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4257defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4258defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4259defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4260
4261defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4262defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4263defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4264
4265class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4266    : NVPTXInst<(outs),
4267                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4268                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4269                []>;
4270multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4271  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4272  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4273}
4274
4275defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4276defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4277defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4278defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4279
4280defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4281defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4282defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4283defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4284
4285defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4286defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4287defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4288defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4289
4290defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4291defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4292defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4293
4294class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4295    : NVPTXInst<(outs),
4296                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4297                                intype:$b, intype:$a)),
4298                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4299                []>;
4300multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4301  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4302  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4303}
4304
4305defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4306defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4307defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4308
4309defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4310defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4311defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4312
4313defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4314defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4315defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4316
4317defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4318defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4319defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4320
4321class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4322    : NVPTXInst<(outs),
4323                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4324                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4325                []>;
4326multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4327  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4328  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4329}
4330
4331defm SUST_B_1D_ARRAY_B8_CLAMP
4332  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4333defm SUST_B_1D_ARRAY_B16_CLAMP
4334  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4335defm SUST_B_1D_ARRAY_B32_CLAMP
4336  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4337defm SUST_B_1D_ARRAY_B64_CLAMP
4338  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4339
4340defm SUST_B_1D_ARRAY_B8_TRAP
4341  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4342defm SUST_B_1D_ARRAY_B16_TRAP
4343  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4344defm SUST_B_1D_ARRAY_B32_TRAP
4345  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4346defm SUST_B_1D_ARRAY_B64_TRAP
4347  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4348
4349defm SUST_B_1D_ARRAY_B8_ZERO
4350  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4351defm SUST_B_1D_ARRAY_B16_ZERO
4352  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4353defm SUST_B_1D_ARRAY_B32_ZERO
4354  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4355defm SUST_B_1D_ARRAY_B64_ZERO
4356  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4357
4358defm SUST_P_1D_ARRAY_B8_TRAP
4359  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4360defm SUST_P_1D_ARRAY_B16_TRAP
4361  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4362defm SUST_P_1D_ARRAY_B32_TRAP
4363  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4364
4365class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4366    : NVPTXInst<(outs),
4367                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4368                                intype:$r, intype:$g)),
4369                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4370                []>;
4371multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4372  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4373  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4374}
4375
4376defm SUST_B_1D_ARRAY_V2B8_CLAMP
4377  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4378defm SUST_B_1D_ARRAY_V2B16_CLAMP
4379  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4380defm SUST_B_1D_ARRAY_V2B32_CLAMP
4381  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4382defm SUST_B_1D_ARRAY_V2B64_CLAMP
4383  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4384
4385defm SUST_B_1D_ARRAY_V2B8_TRAP
4386  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4387defm SUST_B_1D_ARRAY_V2B16_TRAP
4388  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4389defm SUST_B_1D_ARRAY_V2B32_TRAP
4390  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4391defm SUST_B_1D_ARRAY_V2B64_TRAP
4392  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4393
4394defm SUST_B_1D_ARRAY_V2B8_ZERO
4395  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4396defm SUST_B_1D_ARRAY_V2B16_ZERO
4397  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4398defm SUST_B_1D_ARRAY_V2B32_ZERO
4399  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4400defm SUST_B_1D_ARRAY_V2B64_ZERO
4401  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4402
4403defm SUST_P_1D_ARRAY_V2B8_TRAP
4404  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4405defm SUST_P_1D_ARRAY_V2B16_TRAP
4406  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4407defm SUST_P_1D_ARRAY_V2B32_TRAP
4408  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4409
4410class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4411    : NVPTXInst<(outs),
4412                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4413                                intype:$r, intype:$g, intype:$b, intype:$a)),
4414                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4415                []>;
4416multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4417  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4418  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4419}
4420
4421defm SUST_B_1D_ARRAY_V4B8_CLAMP
4422  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4423defm SUST_B_1D_ARRAY_V4B16_CLAMP
4424  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4425defm SUST_B_1D_ARRAY_V4B32_CLAMP
4426  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4427
4428defm SUST_B_1D_ARRAY_V4B8_TRAP
4429  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4430defm SUST_B_1D_ARRAY_V4B16_TRAP
4431  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4432defm SUST_B_1D_ARRAY_V4B32_TRAP
4433  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4434
4435defm SUST_B_1D_ARRAY_V4B8_ZERO
4436  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4437defm SUST_B_1D_ARRAY_V4B16_ZERO
4438  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4439defm SUST_B_1D_ARRAY_V4B32_ZERO
4440  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4441
4442defm SUST_P_1D_ARRAY_V4B8_TRAP
4443  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4444defm SUST_P_1D_ARRAY_V4B16_TRAP
4445  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4446defm SUST_P_1D_ARRAY_V4B32_TRAP
4447  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4448
4449class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4450    : NVPTXInst<(outs),
4451                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4452                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4453                []>;
4454multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4455  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4456  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4457}
4458
4459defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4460defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4461defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4462defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4463
4464defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4465defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4466defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4467defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4468
4469defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4470defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4471defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4472defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4473
4474defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4475defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4476defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4477
4478class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4479    : NVPTXInst<(outs),
4480                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4481                                intype:$r, intype:$g)),
4482                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4483                []>;
4484multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4485  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4486  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4487}
4488
4489defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4490defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4491defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4492defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4493
4494defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4495defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4496defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4497defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4498
4499defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4500defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4501defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4502defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4503
4504defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4505defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4506defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4507
4508class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4509    : NVPTXInst<(outs),
4510                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4511                                intype:$r, intype:$g, intype:$b, intype:$a)),
4512                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4513                []>;
4514multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4515  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4516  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4517}
4518
4519defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4520defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4521defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4522
4523defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4524defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4525defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4526
4527defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4528defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4529defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4530
4531defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4532defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4533defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4534
4535class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4536    : NVPTXInst<(outs),
4537                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4538                                intype:$r)),
4539                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4540                []>;
4541multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4542  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4543  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4544}
4545
4546defm SUST_B_2D_ARRAY_B8_CLAMP
4547  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4548defm SUST_B_2D_ARRAY_B16_CLAMP
4549  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4550defm SUST_B_2D_ARRAY_B32_CLAMP
4551  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4552defm SUST_B_2D_ARRAY_B64_CLAMP
4553  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4554
4555defm SUST_B_2D_ARRAY_B8_TRAP
4556  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4557defm SUST_B_2D_ARRAY_B16_TRAP
4558  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4559defm SUST_B_2D_ARRAY_B32_TRAP
4560  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4561defm SUST_B_2D_ARRAY_B64_TRAP
4562  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4563
4564defm SUST_B_2D_ARRAY_B8_ZERO
4565  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4566defm SUST_B_2D_ARRAY_B16_ZERO
4567  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4568defm SUST_B_2D_ARRAY_B32_ZERO
4569  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4570defm SUST_B_2D_ARRAY_B64_ZERO
4571  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4572
4573defm SUST_P_2D_ARRAY_B8_TRAP
4574  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4575defm SUST_P_2D_ARRAY_B16_TRAP
4576  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4577defm SUST_P_2D_ARRAY_B32_TRAP
4578  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4579
4580class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4581    : NVPTXInst<(outs),
4582                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4583                                intype:$r, intype:$g)),
4584                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4585                []>;
4586multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4587  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4588  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4589}
4590
4591defm SUST_B_2D_ARRAY_V2B8_CLAMP
4592  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4593defm SUST_B_2D_ARRAY_V2B16_CLAMP
4594  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4595defm SUST_B_2D_ARRAY_V2B32_CLAMP
4596  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4597defm SUST_B_2D_ARRAY_V2B64_CLAMP
4598  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4599
4600defm SUST_B_2D_ARRAY_V2B8_TRAP
4601  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4602defm SUST_B_2D_ARRAY_V2B16_TRAP
4603  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4604defm SUST_B_2D_ARRAY_V2B32_TRAP
4605  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4606defm SUST_B_2D_ARRAY_V2B64_TRAP
4607  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4608
4609defm SUST_B_2D_ARRAY_V2B8_ZERO
4610  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4611defm SUST_B_2D_ARRAY_V2B16_ZERO
4612  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4613defm SUST_B_2D_ARRAY_V2B32_ZERO
4614  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4615defm SUST_B_2D_ARRAY_V2B64_ZERO
4616  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4617
4618defm SUST_P_2D_ARRAY_V2B8_TRAP
4619  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4620defm SUST_P_2D_ARRAY_V2B16_TRAP
4621  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4622defm SUST_P_2D_ARRAY_V2B32_TRAP
4623  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4624
4625class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4626    : NVPTXInst<(outs),
4627                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4628                                intype:$r, intype:$g, intype:$b, intype:$a)),
4629                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4630                []>;
4631multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4632  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4633  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4634}
4635
4636defm SUST_B_2D_ARRAY_V4B8_CLAMP
4637  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4638defm SUST_B_2D_ARRAY_V4B16_CLAMP
4639  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4640defm SUST_B_2D_ARRAY_V4B32_CLAMP
4641  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4642
4643defm SUST_B_2D_ARRAY_V4B8_TRAP
4644  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4645defm SUST_B_2D_ARRAY_V4B16_TRAP
4646  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4647defm SUST_B_2D_ARRAY_V4B32_TRAP
4648  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4649
4650defm SUST_B_2D_ARRAY_V4B8_ZERO
4651  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4652defm SUST_B_2D_ARRAY_V4B16_ZERO
4653  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4654defm SUST_B_2D_ARRAY_V4B32_ZERO
4655  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4656
4657defm SUST_P_2D_ARRAY_V4B8_TRAP
4658  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4659defm SUST_P_2D_ARRAY_V4B16_TRAP
4660  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4661defm SUST_P_2D_ARRAY_V4B32_TRAP
4662  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4663
4664class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4665    : NVPTXInst<(outs),
4666                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4667                                intype:$r)),
4668                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4669                []>;
4670multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4671  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4672  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4673}
4674
4675defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4676defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4677defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4678defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4679
4680defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4681defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4682defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4683defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4684
4685defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4686defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4687defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4688defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4689
4690defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4691defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4692defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4693
4694class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4695    : NVPTXInst<(outs),
4696                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4697                                intype:$r, intype:$g)),
4698                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4699                []>;
4700multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4701  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4702  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4703}
4704
4705defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4706defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4707defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4708defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4709
4710defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4711defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4712defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4713defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4714
4715defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4716defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4717defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4718defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4719
4720defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4721defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4722defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4723
4724class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4725    : NVPTXInst<(outs),
4726                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4727                                intype:$r, intype:$g, intype:$b, intype:$a)),
4728                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
4729                []>;
4730multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
4731  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4732  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
4733}
4734
4735defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
4736defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
4737defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
4738
4739defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
4740defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
4741defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
4742
4743defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
4744defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
4745defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
4746
4747defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
4748defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
4749defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
4750
4751}
4752
4753// Surface store instruction patterns
4754// I'm not sure why we can't just include these in the instruction definitions,
4755// but TableGen complains of type errors :(
4756
4757// .clamp variant
4758def : Pat<(int_nvvm_sust_b_1d_i8_clamp
4759           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4760          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4761
4762def : Pat<(int_nvvm_sust_b_1d_i16_clamp
4763           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4764          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4765
4766def : Pat<(int_nvvm_sust_b_1d_i32_clamp
4767           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4768          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
4769
4770def : Pat<(int_nvvm_sust_b_1d_i64_clamp
4771           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4772          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
4773
4774def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
4775           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4776          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4777           Int16Regs:$r, Int16Regs:$g)>;
4778
4779def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
4780           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4781          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4782           Int16Regs:$r, Int16Regs:$g)>;
4783
4784def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
4785           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4786          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4787           Int32Regs:$r, Int32Regs:$g)>;
4788
4789def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
4790           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4791          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4792           Int64Regs:$r, Int64Regs:$g)>;
4793
4794def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
4795           Int64Regs:$s, Int32Regs:$x,
4796           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4797          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4798           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4799
4800def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
4801           Int64Regs:$s, Int32Regs:$x,
4802           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4803          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4804           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4805
4806def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
4807           Int64Regs:$s, Int32Regs:$x,
4808           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4809          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4810           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4811
4812
4813
4814def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
4815           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
4816          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4817           Int16Regs:$r)>;
4818
4819def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
4820           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
4821          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4822           Int16Regs:$r)>;
4823
4824def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
4825           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
4826          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4827           Int32Regs:$r)>;
4828
4829def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
4830           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
4831          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4832           Int64Regs:$r)>;
4833
4834def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
4835          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4836          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4837           Int16Regs:$r, Int16Regs:$g)>;
4838
4839def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
4840          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4841          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4842           Int16Regs:$r, Int16Regs:$g)>;
4843
4844def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
4845          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4846          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4847           Int32Regs:$r, Int32Regs:$g)>;
4848
4849def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
4850          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4851          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4852           Int64Regs:$r, Int64Regs:$g)>;
4853
4854def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
4855           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4856           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4857          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4858           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4859
4860def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
4861           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4862           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4863          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4864           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4865
4866def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
4867           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4868           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4869          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4870           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4871
4872
4873
4874def : Pat<(int_nvvm_sust_b_2d_i8_clamp
4875           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4876          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4877           Int16Regs:$r)>;
4878
4879def : Pat<(int_nvvm_sust_b_2d_i16_clamp
4880           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4881          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4882           Int16Regs:$r)>;
4883
4884def : Pat<(int_nvvm_sust_b_2d_i32_clamp
4885           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4886          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4887           Int32Regs:$r)>;
4888
4889def : Pat<(int_nvvm_sust_b_2d_i64_clamp
4890           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4891          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4892           Int64Regs:$r)>;
4893
4894def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
4895          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
4896          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4897           Int16Regs:$r, Int16Regs:$g)>;
4898
4899def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
4900          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
4901          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4902           Int16Regs:$r, Int16Regs:$g)>;
4903
4904def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
4905          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
4906          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4907           Int32Regs:$r, Int32Regs:$g)>;
4908
4909def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
4910          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
4911          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4912           Int64Regs:$r, Int64Regs:$g)>;
4913
4914def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
4915           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4916           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4917          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4918           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4919
4920def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
4921           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4922           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4923          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4924           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4925
4926def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
4927           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4928           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4929          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4930           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4931
4932
4933
4934def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
4935          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4936          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
4937           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4938           Int16Regs:$r)>;
4939
4940def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
4941          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4942          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
4943           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4944           Int16Regs:$r)>;
4945
4946def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
4947          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4948          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
4949           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4950           Int32Regs:$r)>;
4951
4952def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
4953          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4954          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
4955           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4956           Int64Regs:$r)>;
4957
4958def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
4959           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4960           Int16Regs:$r, Int16Regs:$g),
4961          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4962           Int32Regs:$x, Int32Regs:$y,
4963           Int16Regs:$r, Int16Regs:$g)>;
4964
4965def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
4966           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4967           Int16Regs:$r, Int16Regs:$g),
4968          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4969           Int32Regs:$x, Int32Regs:$y,
4970           Int16Regs:$r, Int16Regs:$g)>;
4971
4972def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
4973           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4974           Int32Regs:$g),
4975          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4976           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
4977
4978def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
4979           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4980           Int64Regs:$g),
4981          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4982           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
4983
4984def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
4985           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4986           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4987          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
4988           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4989           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4990
4991def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
4992           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4993           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4994          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
4995           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4996           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4997
4998def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
4999           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5000           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5001          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
5002           Int32Regs:$x, Int32Regs:$y,
5003           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5004
5005
5006
5007def : Pat<(int_nvvm_sust_b_3d_i8_clamp
5008           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5009           Int16Regs:$r),
5010          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
5011           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5012           Int16Regs:$r)>;
5013
5014def : Pat<(int_nvvm_sust_b_3d_i16_clamp
5015           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5016           Int16Regs:$r),
5017          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
5018           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5019           Int16Regs:$r)>;
5020
5021def : Pat<(int_nvvm_sust_b_3d_i32_clamp
5022           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5023           Int32Regs:$r),
5024          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
5025           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5026           Int32Regs:$r)>;
5027
5028def : Pat<(int_nvvm_sust_b_3d_i64_clamp
5029           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5030           Int64Regs:$r),
5031          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5032           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5033           Int64Regs:$r)>;
5034
5035def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5036           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5037           Int16Regs:$r, Int16Regs:$g),
5038          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5039           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5040           Int16Regs:$r, Int16Regs:$g)>;
5041
5042def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5043           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5044           Int16Regs:$r, Int16Regs:$g),
5045          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5046           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5047           Int16Regs:$r, Int16Regs:$g)>;
5048
5049def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5050           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5051           Int32Regs:$r, Int32Regs:$g),
5052          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5053           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5054           Int32Regs:$r, Int32Regs:$g)>;
5055
5056def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5057           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5058           Int64Regs:$r, Int64Regs:$g),
5059          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5060           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5061           Int64Regs:$r, Int64Regs:$g)>;
5062
5063def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5064           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5065           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5066          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5067           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5068           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5069
5070def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5071           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5072           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5073          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5074           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5075           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5076
5077def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5078           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5079           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5080          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5081           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5082           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5083
5084
5085// .trap variant
5086def : Pat<(int_nvvm_sust_b_1d_i8_trap
5087           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5088          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5089
5090def : Pat<(int_nvvm_sust_b_1d_i16_trap
5091           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5092          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5093
5094def : Pat<(int_nvvm_sust_b_1d_i32_trap
5095           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5096          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5097
5098def : Pat<(int_nvvm_sust_b_1d_i64_trap
5099           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5100          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5101
5102def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5103           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5104          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5105           Int16Regs:$r, Int16Regs:$g)>;
5106
5107def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5108           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5109          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5110           Int16Regs:$r, Int16Regs:$g)>;
5111
5112def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5113           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5114          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5115           Int32Regs:$r, Int32Regs:$g)>;
5116
5117def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5118           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5119          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5120           Int64Regs:$r, Int64Regs:$g)>;
5121
5122def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5123           Int64Regs:$s, Int32Regs:$x,
5124           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5125          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5126           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5127
5128def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5129           Int64Regs:$s, Int32Regs:$x,
5130           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5131          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5132           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5133
5134def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5135           Int64Regs:$s, Int32Regs:$x,
5136           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5137          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5138           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5139
5140
5141
5142def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5143           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5144          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5145           Int16Regs:$r)>;
5146
5147def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5148           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5149          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5150           Int16Regs:$r)>;
5151
5152def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5153           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5154          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5155           Int32Regs:$r)>;
5156
5157def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5158           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5159          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5160           Int64Regs:$r)>;
5161
5162def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5163          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5164          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5165           Int16Regs:$r, Int16Regs:$g)>;
5166
5167def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5168          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5169          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5170           Int16Regs:$r, Int16Regs:$g)>;
5171
5172def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5173          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5174          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5175           Int32Regs:$r, Int32Regs:$g)>;
5176
5177def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5178          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5179          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5180           Int64Regs:$r, Int64Regs:$g)>;
5181
5182def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5183           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5184           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5185          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5186           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5187
5188def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5189           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5190           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5191          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5192           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5193
5194def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5195           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5196           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5197          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5198           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5199
5200
5201
5202def : Pat<(int_nvvm_sust_b_2d_i8_trap
5203           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5204          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5205           Int16Regs:$r)>;
5206
5207def : Pat<(int_nvvm_sust_b_2d_i16_trap
5208           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5209          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5210           Int16Regs:$r)>;
5211
5212def : Pat<(int_nvvm_sust_b_2d_i32_trap
5213           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5214          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5215           Int32Regs:$r)>;
5216
5217def : Pat<(int_nvvm_sust_b_2d_i64_trap
5218           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5219          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5220           Int64Regs:$r)>;
5221
5222def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5223          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5224          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5225           Int16Regs:$r, Int16Regs:$g)>;
5226
5227def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5228          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5229          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5230           Int16Regs:$r, Int16Regs:$g)>;
5231
5232def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5233          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5234          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5235           Int32Regs:$r, Int32Regs:$g)>;
5236
5237def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5238          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5239          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5240           Int64Regs:$r, Int64Regs:$g)>;
5241
5242def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5243           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5244           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5245          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5246           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5247
5248def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5249           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5250           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5251          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5252           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5253
5254def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5255           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5256           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5257          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5258           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5259
5260
5261
5262def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5263          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5264          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5265           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5266           Int16Regs:$r)>;
5267
5268def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5269          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5270          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5271           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5272           Int16Regs:$r)>;
5273
5274def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5275          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5276          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5277           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5278           Int32Regs:$r)>;
5279
5280def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5281          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5282          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5283           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5284           Int64Regs:$r)>;
5285
5286def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5287           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5288           Int16Regs:$r, Int16Regs:$g),
5289          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5290           Int32Regs:$x, Int32Regs:$y,
5291           Int16Regs:$r, Int16Regs:$g)>;
5292
5293def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5294           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5295           Int16Regs:$r, Int16Regs:$g),
5296          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5297           Int32Regs:$x, Int32Regs:$y,
5298           Int16Regs:$r, Int16Regs:$g)>;
5299
5300def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5301           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5302           Int32Regs:$g),
5303          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5304           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5305
5306def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5307           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5308           Int64Regs:$g),
5309          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5310           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5311
5312def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5313           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5314           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5315          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5316           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5317           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5318
5319def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5320           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5321           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5322          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5323           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5324           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5325
5326def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5327           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5328           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5329          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5330           Int32Regs:$x, Int32Regs:$y,
5331           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5332
5333
5334
5335def : Pat<(int_nvvm_sust_b_3d_i8_trap
5336           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5337           Int16Regs:$r),
5338          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5339           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5340           Int16Regs:$r)>;
5341
5342def : Pat<(int_nvvm_sust_b_3d_i16_trap
5343           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5344           Int16Regs:$r),
5345          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5346           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5347           Int16Regs:$r)>;
5348
5349def : Pat<(int_nvvm_sust_b_3d_i32_trap
5350           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5351           Int32Regs:$r),
5352          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5353           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5354           Int32Regs:$r)>;
5355
5356def : Pat<(int_nvvm_sust_b_3d_i64_trap
5357           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5358           Int64Regs:$r),
5359          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5360           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5361           Int64Regs:$r)>;
5362
5363def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5364           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5365           Int16Regs:$r, Int16Regs:$g),
5366          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5367           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5368           Int16Regs:$r, Int16Regs:$g)>;
5369
5370def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5371           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5372           Int16Regs:$r, Int16Regs:$g),
5373          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5374           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5375           Int16Regs:$r, Int16Regs:$g)>;
5376
5377def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5378           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5379           Int32Regs:$r, Int32Regs:$g),
5380          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5381           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5382           Int32Regs:$r, Int32Regs:$g)>;
5383
5384def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5385           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5386           Int64Regs:$r, Int64Regs:$g),
5387          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5388           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5389           Int64Regs:$r, Int64Regs:$g)>;
5390
5391def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5392           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5393           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5394          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5395           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5396           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5397
5398def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5399           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5400           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5401          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5402           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5403           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5404
5405def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5406           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5407           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5408          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5409           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5410           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5411
5412
5413// .zero variant
5414def : Pat<(int_nvvm_sust_b_1d_i8_zero
5415           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5416          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5417
5418def : Pat<(int_nvvm_sust_b_1d_i16_zero
5419           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5420          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5421
5422def : Pat<(int_nvvm_sust_b_1d_i32_zero
5423           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5424          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5425
5426def : Pat<(int_nvvm_sust_b_1d_i64_zero
5427           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5428          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5429
5430def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5431           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5432          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5433           Int16Regs:$r, Int16Regs:$g)>;
5434
5435def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5436           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5437          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5438           Int16Regs:$r, Int16Regs:$g)>;
5439
5440def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5441           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5442          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5443           Int32Regs:$r, Int32Regs:$g)>;
5444
5445def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5446           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5447          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5448           Int64Regs:$r, Int64Regs:$g)>;
5449
5450def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5451           Int64Regs:$s, Int32Regs:$x,
5452           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5453          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5454           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5455
5456def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5457           Int64Regs:$s, Int32Regs:$x,
5458           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5459          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5460           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5461
5462def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5463           Int64Regs:$s, Int32Regs:$x,
5464           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5465          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5466           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5467
5468
5469
5470def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5471           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5472          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5473           Int16Regs:$r)>;
5474
5475def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5476           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5477          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5478           Int16Regs:$r)>;
5479
5480def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5481           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5482          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5483           Int32Regs:$r)>;
5484
5485def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5486           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5487          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5488           Int64Regs:$r)>;
5489
5490def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5491          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5492          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5493           Int16Regs:$r, Int16Regs:$g)>;
5494
5495def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5496          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5497          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5498           Int16Regs:$r, Int16Regs:$g)>;
5499
5500def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5501          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5502          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5503           Int32Regs:$r, Int32Regs:$g)>;
5504
5505def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5506          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5507          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5508           Int64Regs:$r, Int64Regs:$g)>;
5509
5510def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5511           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5512           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5513          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5514           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5515
5516def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5517           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5518           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5519          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5520           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5521
5522def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5523           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5524           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5525          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5526           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5527
5528
5529
5530def : Pat<(int_nvvm_sust_b_2d_i8_zero
5531           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5532          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5533           Int16Regs:$r)>;
5534
5535def : Pat<(int_nvvm_sust_b_2d_i16_zero
5536           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5537          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5538           Int16Regs:$r)>;
5539
5540def : Pat<(int_nvvm_sust_b_2d_i32_zero
5541           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5542          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5543           Int32Regs:$r)>;
5544
5545def : Pat<(int_nvvm_sust_b_2d_i64_zero
5546           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5547          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5548           Int64Regs:$r)>;
5549
5550def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5551          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5552          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5553           Int16Regs:$r, Int16Regs:$g)>;
5554
5555def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5556          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5557          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5558           Int16Regs:$r, Int16Regs:$g)>;
5559
5560def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5561          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5562          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5563           Int32Regs:$r, Int32Regs:$g)>;
5564
5565def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5566          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5567          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5568           Int64Regs:$r, Int64Regs:$g)>;
5569
5570def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5571           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5572           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5573          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5574           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5575
5576def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5577           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5578           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5579          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5580           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5581
5582def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5583           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5584           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5585          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5586           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5587
5588
5589
5590def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5591          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5592          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5593           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5594           Int16Regs:$r)>;
5595
5596def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5597          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5598          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5599           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5600           Int16Regs:$r)>;
5601
5602def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5603          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5604          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5605           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5606           Int32Regs:$r)>;
5607
5608def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5609          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5610          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5611           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5612           Int64Regs:$r)>;
5613
5614def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5615           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5616           Int16Regs:$r, Int16Regs:$g),
5617          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5618           Int32Regs:$x, Int32Regs:$y,
5619           Int16Regs:$r, Int16Regs:$g)>;
5620
5621def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5622           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5623           Int16Regs:$r, Int16Regs:$g),
5624          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5625           Int32Regs:$x, Int32Regs:$y,
5626           Int16Regs:$r, Int16Regs:$g)>;
5627
5628def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5629           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5630           Int32Regs:$g),
5631          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5632           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5633
5634def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5635           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5636           Int64Regs:$g),
5637          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5638           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5639
5640def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5641           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5642           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5643          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5644           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5645           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5646
5647def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5648           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5649           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5650          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5651           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5652           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5653
5654def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5655           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5656           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5657          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5658           Int32Regs:$x, Int32Regs:$y,
5659           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5660
5661
5662
5663def : Pat<(int_nvvm_sust_b_3d_i8_zero
5664           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5665           Int16Regs:$r),
5666          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5667           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5668           Int16Regs:$r)>;
5669
5670def : Pat<(int_nvvm_sust_b_3d_i16_zero
5671           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5672           Int16Regs:$r),
5673          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5674           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5675           Int16Regs:$r)>;
5676
5677def : Pat<(int_nvvm_sust_b_3d_i32_zero
5678           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5679           Int32Regs:$r),
5680          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5681           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5682           Int32Regs:$r)>;
5683
5684def : Pat<(int_nvvm_sust_b_3d_i64_zero
5685           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5686           Int64Regs:$r),
5687          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5688           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5689           Int64Regs:$r)>;
5690
5691def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5692           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5693           Int16Regs:$r, Int16Regs:$g),
5694          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5695           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5696           Int16Regs:$r, Int16Regs:$g)>;
5697
5698def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5699           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5700           Int16Regs:$r, Int16Regs:$g),
5701          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5702           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5703           Int16Regs:$r, Int16Regs:$g)>;
5704
5705def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5706           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5707           Int32Regs:$r, Int32Regs:$g),
5708          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5709           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5710           Int32Regs:$r, Int32Regs:$g)>;
5711
5712def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5713           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5714           Int64Regs:$r, Int64Regs:$g),
5715          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5716           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5717           Int64Regs:$r, Int64Regs:$g)>;
5718
5719def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5720           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5721           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5722          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5723           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5724           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5725
5726def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
5727           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5728           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5729          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
5730           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5731           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5732
5733def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
5734           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5735           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5736          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
5737           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5738           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5739
5740
5741
5742
5743def : Pat<(int_nvvm_sust_p_1d_i8_trap
5744           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5745          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5746
5747def : Pat<(int_nvvm_sust_p_1d_i16_trap
5748           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5749          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5750
5751def : Pat<(int_nvvm_sust_p_1d_i32_trap
5752           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5753          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5754
5755def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
5756           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5757          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5758           Int16Regs:$r, Int16Regs:$g)>;
5759
5760def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
5761           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5762          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5763           Int16Regs:$r, Int16Regs:$g)>;
5764
5765def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
5766           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5767          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5768           Int32Regs:$r, Int32Regs:$g)>;
5769
5770def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
5771           Int64Regs:$s, Int32Regs:$x,
5772           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5773          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5774           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5775
5776def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
5777           Int64Regs:$s, Int32Regs:$x,
5778           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5779          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5780           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5781
5782def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
5783           Int64Regs:$s, Int32Regs:$x,
5784           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5785          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5786           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5787
5788
5789
5790def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
5791           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5792          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5793           Int16Regs:$r)>;
5794
5795def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
5796           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5797          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5798           Int16Regs:$r)>;
5799
5800def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
5801           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5802          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5803           Int32Regs:$r)>;
5804
5805def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
5806          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5807          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5808           Int16Regs:$r, Int16Regs:$g)>;
5809
5810def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
5811          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5812          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5813           Int16Regs:$r, Int16Regs:$g)>;
5814
5815def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
5816          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5817          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5818           Int32Regs:$r, Int32Regs:$g)>;
5819
5820def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
5821           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5822           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5823          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5824           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5825
5826def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
5827           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5828           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5829          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5830           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5831
5832def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
5833           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5834           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5835          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5836           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5837
5838
5839
5840def : Pat<(int_nvvm_sust_p_2d_i8_trap
5841           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5842          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5843           Int16Regs:$r)>;
5844
5845def : Pat<(int_nvvm_sust_p_2d_i16_trap
5846           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5847          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5848           Int16Regs:$r)>;
5849
5850def : Pat<(int_nvvm_sust_p_2d_i32_trap
5851           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5852          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5853           Int32Regs:$r)>;
5854
5855def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
5856          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5857          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5858           Int16Regs:$r, Int16Regs:$g)>;
5859
5860def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
5861          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5862          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5863           Int16Regs:$r, Int16Regs:$g)>;
5864
5865def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
5866          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5867          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5868           Int32Regs:$r, Int32Regs:$g)>;
5869
5870def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
5871           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5872           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5873          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5874           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5875
5876def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
5877           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5878           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5879          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5880           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5881
5882def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
5883           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5884           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5885          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5886           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5887
5888
5889
5890def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
5891          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5892          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5893           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5894           Int16Regs:$r)>;
5895
5896def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
5897          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5898          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5899           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5900           Int16Regs:$r)>;
5901
5902def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
5903          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5904          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5905           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5906           Int32Regs:$r)>;
5907
5908def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
5909           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5910           Int16Regs:$r, Int16Regs:$g),
5911          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5912           Int32Regs:$x, Int32Regs:$y,
5913           Int16Regs:$r, Int16Regs:$g)>;
5914
5915def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
5916           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5917           Int16Regs:$r, Int16Regs:$g),
5918          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5919           Int32Regs:$x, Int32Regs:$y,
5920           Int16Regs:$r, Int16Regs:$g)>;
5921
5922def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
5923           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5924           Int32Regs:$g),
5925          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5926           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5927
5928def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
5929           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5930           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5931          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5932           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5933           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5934
5935def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
5936           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5937           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5938          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5939           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5940           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5941
5942def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
5943           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5944           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5945          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5946           Int32Regs:$x, Int32Regs:$y,
5947           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5948
5949
5950
5951def : Pat<(int_nvvm_sust_p_3d_i8_trap
5952           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5953           Int16Regs:$r),
5954          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
5955           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5956           Int16Regs:$r)>;
5957
5958def : Pat<(int_nvvm_sust_p_3d_i16_trap
5959           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5960           Int16Regs:$r),
5961          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
5962           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5963           Int16Regs:$r)>;
5964
5965def : Pat<(int_nvvm_sust_p_3d_i32_trap
5966           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5967           Int32Regs:$r),
5968          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
5969           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5970           Int32Regs:$r)>;
5971
5972def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
5973           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5974           Int16Regs:$r, Int16Regs:$g),
5975          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
5976           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5977           Int16Regs:$r, Int16Regs:$g)>;
5978
5979def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
5980           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5981           Int16Regs:$r, Int16Regs:$g),
5982          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
5983           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5984           Int16Regs:$r, Int16Regs:$g)>;
5985
5986def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
5987           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5988           Int32Regs:$r, Int32Regs:$g),
5989          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
5990           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5991           Int32Regs:$r, Int32Regs:$g)>;
5992
5993def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
5994           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5995           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5996          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
5997           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5998           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5999
6000def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
6001           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6002           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6003          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
6004           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6005           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
6006
6007def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
6008           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6009           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6010          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
6011           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
6012           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
6013
6014//-----------------------------------
6015// Read Special Registers
6016//-----------------------------------
6017
6018class PTX_READ_SREG_R64<string regname, Intrinsic intop>
6019  : NVPTXInst<(outs Int64Regs:$d), (ins),
6020              !strconcat("mov.u64 \t$d, %", regname, ";"),
6021              [(set Int64Regs:$d, (intop))]>;
6022
6023class PTX_READ_SREG_R32<string regname, Intrinsic intop>
6024  : NVPTXInst<(outs Int32Regs:$d), (ins),
6025              !strconcat("mov.u32 \t$d, %", regname, ";"),
6026              [(set Int32Regs:$d, (intop))]>;
6027
6028// TODO Add read vector-version of special registers
6029
6030def INT_PTX_SREG_TID_X :
6031    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
6032def INT_PTX_SREG_TID_Y :
6033    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
6034def INT_PTX_SREG_TID_Z :
6035    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
6036def INT_PTX_SREG_TID_W :
6037    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
6038
6039def INT_PTX_SREG_NTID_X :
6040    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
6041def INT_PTX_SREG_NTID_Y :
6042    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
6043def INT_PTX_SREG_NTID_Z :
6044    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
6045def INT_PTX_SREG_NTID_W :
6046    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
6047
6048def INT_PTX_SREG_LANEID :
6049    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6050def INT_PTX_SREG_WARPID :
6051    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6052def INT_PTX_SREG_NWARPID :
6053    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6054
6055def INT_PTX_SREG_CTAID_X :
6056    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
6057def INT_PTX_SREG_CTAID_Y :
6058    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
6059def INT_PTX_SREG_CTAID_Z :
6060    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
6061def INT_PTX_SREG_CTAID_W :
6062    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
6063
6064def INT_PTX_SREG_NCTAID_X :
6065    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
6066def INT_PTX_SREG_NCTAID_Y :
6067    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
6068def INT_PTX_SREG_NCTAID_Z :
6069    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
6070def INT_PTX_SREG_NCTAID_W :
6071    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
6072
6073def INT_PTX_SREG_SMID :
6074    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6075def INT_PTX_SREG_NSMID :
6076    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6077def INT_PTX_SREG_GRIDID :
6078    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6079
6080def INT_PTX_SREG_LANEMASK_EQ :
6081    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6082def INT_PTX_SREG_LANEMASK_LE :
6083    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6084def INT_PTX_SREG_LANEMASK_LT :
6085    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6086def INT_PTX_SREG_LANEMASK_GE :
6087    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6088def INT_PTX_SREG_LANEMASK_GT :
6089    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6090
6091def INT_PTX_SREG_CLOCK :
6092    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6093def INT_PTX_SREG_CLOCK64 :
6094    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6095
6096def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6097def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6098def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6099def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6100
6101// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6102// handle the constant.
6103def INT_PTX_SREG_WARPSIZE :
6104    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6105              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6106
6107// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6108// In addition to target-independent fields provided by WMMA_REGS, it adds
6109// the fields commonly used to implement specific PTX instruction -- register
6110// types and names, constraints, parts of assembly, etc.
6111class WMMA_REGINFO<WMMA_REGS r, string op>
6112      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6113  // NVPTX register types used to carry fragment data.
6114  NVPTXRegClass regclass = !cond(
6115    !eq(ptx_elt_type, "f16") : Float16x2Regs,
6116    !eq(ptx_elt_type, "f32") : Float32Regs,
6117    !eq(ptx_elt_type, "f64") : Float64Regs,
6118    !eq(ptx_elt_type, "bf16") : Int32Regs,
6119    !eq(ptx_elt_type, "tf32") : Int32Regs,
6120    !eq(ptx_elt_type, "s32") : Int32Regs,
6121    !eq(ptx_elt_type, "b16") : Int32Regs,
6122    !eq(ptx_elt_type, "s8") : Int32Regs,
6123    !eq(ptx_elt_type, "u8") : Int32Regs,
6124    !eq(ptx_elt_type, "s4") : Int32Regs,
6125    !eq(ptx_elt_type, "u4") : Int32Regs,
6126    !eq(ptx_elt_type, "b1") : Int32Regs);
6127
6128  // Instruction input/output arguments for the fragment.
6129  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6130
6131  // List of register names for the fragment -- ["ra0", "ra1",...]
6132  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6133
6134  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6135  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6136
6137  // Predicates for particular fragment variant. Technically those are
6138  // per-instruction predicates, but currently all fragments that can be used in
6139  // a given instruction are subject to the same constraints, so an instruction
6140  // can use predicates from any of its fragments. If/when this is no
6141  // longer the case, we can concat all per-fragment predicates to enforce that
6142  // all fragments of the instruction are viable.
6143  list<Predicate> Predicates = !cond(
6144    // fp16 -> fp16/fp32 @ m16n16k16
6145    !and(!eq(geom, "m16n16k16"),
6146         !or(!eq(ptx_elt_type, "f16"),
6147             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
6148
6149    !and(!eq(geom,"m8n8k4"),
6150         !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
6151
6152    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6153    !and(!or(!eq(geom, "m8n32k16"),
6154             !eq(geom, "m32n8k16")),
6155         !or(!eq(ptx_elt_type, "f16"),
6156             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
6157
6158    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6159    !and(!or(!eq(geom,"m16n16k16"),
6160             !eq(geom,"m8n32k16"),
6161             !eq(geom,"m32n8k16")),
6162         !or(!eq(ptx_elt_type, "u8"),
6163             !eq(ptx_elt_type, "s8"),
6164             !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
6165
6166    !and(!or(!eq(geom,"m16n16k16"),
6167             !eq(geom,"m8n32k16"),
6168             !eq(geom,"m32n8k16")),
6169         !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
6170
6171    !and(!eq(geom,"m16n16k8"),
6172         !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
6173
6174    !and(!eq(geom,"m16n16k8"),
6175         !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
6176
6177    // b1 -> s32 @ m8n8k128(b1)
6178    !and(!ne(op,"mma"),
6179         !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
6180
6181    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6182    !and(!ne(op,"mma"),
6183         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
6184
6185    !or(!eq(geom,"m16n8k8"),
6186        !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
6187
6188    !and(!ne(ptx_elt_type,"f64"),
6189         !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
6190
6191    // mma m8n8k32 requires higher PTX version
6192    !and(!eq(op,"mma"),
6193         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
6194
6195    !and(!eq(ptx_elt_type,"f64"),
6196         !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
6197
6198    !and(!eq(op,"mma"),
6199         !or(!eq(geom, "m16n8k16"),
6200             !eq(geom, "m16n8k4"),
6201             !eq(geom, "m16n8k32"),
6202             !eq(geom, "m16n8k64"),
6203             !eq(geom, "m8n8k128"),
6204             !eq(geom, "m16n8k128"),
6205             !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
6206
6207    !and(!eq(op,"ldmatrix"),
6208         !eq(ptx_elt_type,"b16"),
6209         !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
6210
6211  // template DAGs for instruction inputs/output.
6212  dag Outs = !dag(outs, ptx_regs, reg_names);
6213  dag Ins = !dag(ins, ptx_regs, reg_names);
6214}
6215
6216// Convert dag of arguments into a dag to match given intrinsic.
6217class BuildPatternI<Intrinsic Intr, dag Ins> {
6218  // Build a dag pattern that matches the intrinsic call.
6219  dag ret = !foreach(tmp, Ins,
6220                          !subst(imem, ADDRvar,
6221                          !subst(MEMri64, ADDRri64,
6222                          !subst(MEMri, ADDRri,
6223                          !subst(ins, Intr, tmp)))));
6224}
6225
6226// Same as above, but uses PatFrag instead of an Intrinsic.
6227class BuildPatternPF<PatFrag Intr, dag Ins> {
6228  // Build a dag pattern that matches the intrinsic call.
6229  dag ret = !foreach(tmp, Ins,
6230                          !subst(imem, ADDRvar,
6231                          !subst(MEMri64, ADDRri64,
6232                          !subst(MEMri, ADDRri,
6233                          !subst(ins, Intr, tmp)))));
6234}
6235
6236// Common WMMA-related fields used for building patterns for all MMA instructions.
6237class WMMA_INSTR<string _Intr, list<dag> _Args>
6238  : NVPTXInst<(outs), (ins), "?", []> {
6239  Intrinsic Intr = !cast<Intrinsic>(_Intr);
6240  // Concatenate all arguments into a single dag.
6241  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6242  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6243  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6244}
6245
6246//
6247// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6248//
6249
6250class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6251                DAGOperand SrcOp>
6252  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6253                              [!con((ins SrcOp:$src),
6254                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6255    Requires<Frag.Predicates> {
6256  // Load/store intrinsics are overloaded on pointer's address space.
6257  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6258  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6259  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6260  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6261  // Build PatFrag that only matches particular address space.
6262  PatFrag IntrFrag = PatFrag<PFOperands,
6263                             PFOperandsIntr,
6264                             !cond(!eq(Space, ".shared"): AS_match.shared,
6265                                   !eq(Space, ".global"): AS_match.global,
6266                                   true: AS_match.generic)>;
6267  // Build AS-constrained pattern.
6268  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6269
6270  let OutOperandList = Frag.Outs;
6271  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6272  let AsmString = "wmma.load."
6273                  # Frag.frag
6274                  # ".sync"
6275                  # "${ptx:aligned}"
6276                  # "." # Layout
6277                  # "." # Frag.geom
6278                  # Space
6279                  # "." # Frag.ptx_elt_type # " \t"
6280                  # Frag.regstring
6281                  # ", [$src]"
6282                  # !if(WithStride, ", $ldm", "")
6283                  # ";";
6284}
6285
6286//
6287// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6288//
6289class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6290                   bit WithStride, DAGOperand DstOp>
6291  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6292               [!con((ins DstOp:$dst),
6293                     Frag.Ins,
6294                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6295    Requires<Frag.Predicates> {
6296
6297  // Load/store intrinsics are overloaded on pointer's address space.
6298  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6299  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6300  dag PFOperands = !con((ops node:$dst),
6301                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6302                        !if(WithStride, (ops node:$ldm), (ops)));
6303  // Build PatFrag that only matches particular address space.
6304  PatFrag IntrFrag = PatFrag<PFOperands,
6305                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6306                             !cond(!eq(Space, ".shared"): AS_match.shared,
6307                                   !eq(Space, ".global"): AS_match.global,
6308                                   true: AS_match.generic)>;
6309  // Build AS-constrained pattern.
6310  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6311
6312  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6313  let OutOperandList = (outs);
6314  let AsmString = "wmma.store.d.sync"
6315                  # "${ptx:aligned}"
6316                  # "." # Layout
6317                  # "." # Frag.geom
6318                  # Space
6319                  # "." # Frag.ptx_elt_type
6320                  # " \t[$dst],"
6321                  # Frag.regstring
6322                  # !if(WithStride, ", $ldm", "")
6323                  # ";";
6324}
6325
6326// Create all load/store variants
6327defset list<WMMA_INSTR> MMA_LDSTs  = {
6328  foreach layout = ["row", "col"] in {
6329    foreach stride = [false, true] in {
6330      foreach space = [".global", ".shared", ""] in {
6331        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6332          foreach frag = NVVM_MMA_OPS.all_ld_ops in
6333            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6334              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6335          foreach frag = NVVM_MMA_OPS.all_st_ops in
6336            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6337              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6338        } // addr
6339      } // space
6340    } // stride
6341  } // layout
6342} // defset
6343
6344// B1 instruction variants need extra constraints.
6345class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6346  string Op = b1op;
6347  WMMA_REGINFO Frag = FragA;
6348  list<Predicate> ret = !listconcat(
6349    FragA.Predicates,
6350    !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
6351  );
6352}
6353// WMMA.MMA
6354class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6355               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6356               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6357  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6358                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
6359    // Requires does not seem to have effect on Instruction w/o Patterns.
6360    // We set it here anyways and propagate to the Pat<> we construct below.
6361    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6362  let OutOperandList = FragD.Outs;
6363  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6364  string TypeList = !cond(
6365    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6366                                     # "." # FragC.ptx_elt_type,
6367    1: "." # FragD.ptx_elt_type
6368       # "." # FragA.ptx_elt_type
6369       # "." # FragB.ptx_elt_type
6370       # "." # FragC.ptx_elt_type,
6371  );
6372  let AsmString = "wmma.mma"
6373                  # b1op
6374                  # ".sync"
6375                  # "${ptx:aligned}"
6376                  # "." # ALayout
6377                  # "." # BLayout
6378                  # "." # FragA.geom
6379                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6380                  # TypeList
6381                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6382                  # FragD.regstring # ",\n\t\t"
6383                  # FragA.regstring # ",\n\t\t"
6384                  # FragB.regstring # ",\n\t\t"
6385                  # FragC.regstring # ";";
6386}
6387
6388defset list<WMMA_INSTR> WMMAs  = {
6389  foreach layout_a = ["row", "col"] in {
6390    foreach layout_b = ["row", "col"] in {
6391      foreach satf = [0, 1] in {
6392        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6393          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6394            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6395              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6396                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6397                              WMMA_REGINFO<op[1], "wmma.mma">,
6398                              WMMA_REGINFO<op[2], "wmma.mma">,
6399                              WMMA_REGINFO<op[3], "wmma.mma">,
6400                              layout_a, layout_b, satf, rnd, b1op>;
6401              }
6402            } // b1op
6403          } // op
6404        } // rnd
6405      } // satf
6406    } // layout_b
6407  } // layout_a
6408} // defset
6409
6410// MMA
6411class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6412               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6413               string ALayout, string BLayout, int Satfinite, string b1op>
6414  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6415                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
6416    // Requires does not seem to have effect on Instruction w/o Patterns.
6417    // We set it here anyways and propagate to the Pat<> we construct below.
6418  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6419  let OutOperandList = FragD.Outs;
6420  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6421  string TypeList = "." # FragD.ptx_elt_type
6422                    # "." # FragA.ptx_elt_type
6423                    # "." # FragB.ptx_elt_type
6424                    # "." # FragC.ptx_elt_type;
6425  let AsmString = "mma.sync.aligned."
6426                  # FragA.geom
6427                  # "." # ALayout
6428                  # "." # BLayout
6429                  # !if(Satfinite, ".satfinite", "")
6430                  # TypeList
6431                  # b1op # "\n\t\t"
6432                  # FragD.regstring # ",\n\t\t"
6433                  # FragA.regstring # ",\n\t\t"
6434                  # FragB.regstring # ",\n\t\t"
6435                  # FragC.regstring # ";";
6436}
6437
6438defset list<WMMA_INSTR> MMAs  = {
6439  foreach layout_a = ["row", "col"] in {
6440    foreach layout_b = ["row", "col"] in {
6441      foreach satf = [0, 1] in {
6442        foreach op = NVVM_MMA_OPS.all_mma_ops in {
6443          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6444            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6445              def : MMA<WMMA_REGINFO<op[0], "mma">,
6446                        WMMA_REGINFO<op[1], "mma">,
6447                        WMMA_REGINFO<op[2], "mma">,
6448                        WMMA_REGINFO<op[3], "mma">,
6449                        layout_a, layout_b, satf, b1op>;
6450            }
6451          } // b1op
6452        } // op
6453      } // satf
6454    } // layout_b
6455  } // layout_a
6456} // defset
6457
6458//
6459// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6460//
6461class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6462               DAGOperand SrcOp>
6463  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6464    Requires<Frag.Predicates> {
6465  // Build PatFrag that only matches particular address space.
6466  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6467                             !cond(!eq(Space, ".shared"): AS_match.shared,
6468                                   true: AS_match.generic)>;
6469  // Build AS-constrained pattern.
6470  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6471
6472  let OutOperandList = Frag.Outs;
6473  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6474  let AsmString = "ldmatrix.sync.aligned."
6475                  # Frag.geom
6476                  # "." # Frag.frag
6477                  # !if(Transposed, ".trans", "")
6478                  # Space
6479                  # "." # Frag.ptx_elt_type
6480                  # " " # Frag.regstring # ", [$src];";
6481}
6482
6483// Create all ldmatrix variants
6484defset list<WMMA_INSTR> LDMATRIXs  = {
6485  foreach transposed = [false, true] in {
6486    foreach space = [".shared", ""] in {
6487      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6488        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6489          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6490            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6491                            addr>;
6492      } // addr
6493    } // space
6494  } // transposed
6495} // defset
6496
6497// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6498// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6499// the instruction record.
6500class MMA_PAT<WMMA_INSTR wi>
6501      : Pat<wi.IntrinsicPattern,
6502            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6503                 (wi ptx.version))>,
6504        Requires<wi.Predicates>;
6505
6506// Build intrinsic->instruction patterns for all MMA instructions.
6507foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6508  def : MMA_PAT<mma>;
6509