xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9def immFloat0 : PatLeaf<(fpimm), [{
10    float f = (float)N->getValueAPF().convertToFloat();
11    return (f==0.0f);
12}]>;
13
14def immFloat1 : PatLeaf<(fpimm), [{
15    float f = (float)N->getValueAPF().convertToFloat();
16    return (f==1.0f);
17}]>;
18
19def immDouble0 : PatLeaf<(fpimm), [{
20    double d = (double)N->getValueAPF().convertToDouble();
21    return (d==0.0);
22}]>;
23
24def immDouble1 : PatLeaf<(fpimm), [{
25    double d = (double)N->getValueAPF().convertToDouble();
26    return (d==1.0);
27}]>;
28
29def AS_match {
30  code generic = [{
31   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
32  }];
33  code shared = [{
34   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
35  }];
36  code global = [{
37   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
38  }];
39}
40
41// A node that will be replaced with the current PTX version.
42class PTX {
43  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
44    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
45  }]>;
46  // (i32 0) will be XForm'ed to the currently used PTX version.
47  dag version = (PTXVerXform (i32 0));
48}
49def ptx : PTX;
50
51// Generates list of n sequential register names.
52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
53class RegSeq<int n, string prefix> {
54  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55                                        [prefix # !sub(n, 1)]),
56                            []);
57}
58
59class THREADMASK_INFO<bit sync> {
60  list<bit> ret = !if(sync, [0, 1], [0]);
61}
62
63//-----------------------------------
64// Synchronization and shuffle functions
65//-----------------------------------
66let isConvergent = true in {
67def INT_BARRIER0 : NVPTXInst<(outs), (ins),
68                  "bar.sync \t0;",
69      [(int_nvvm_barrier0)]>;
70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
71                  "bar.sync \t$src1;",
72      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
74                  "bar.sync \t$src1, $src2;",
75      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
77  !strconcat("{{ \n\t",
78             ".reg .pred \t%p1; \n\t",
79             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
80             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
81             "}}"),
82      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
84  !strconcat("{{ \n\t",
85             ".reg .pred \t%p1; \n\t",
86             ".reg .pred \t%p2; \n\t",
87             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
88             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
89             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
90             "}}"),
91      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
93  !strconcat("{{ \n\t",
94             ".reg .pred \t%p1; \n\t",
95             ".reg .pred \t%p2; \n\t",
96             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
97             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
98             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
99             "}}"),
100      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
101
102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
103                             [(int_nvvm_bar_sync imm:$i)]>;
104
105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
106                             [(int_nvvm_bar_warp_sync imm:$i)]>,
107        Requires<[hasPTX60, hasSM30]>;
108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
109                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
110        Requires<[hasPTX60, hasSM30]>;
111
112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
113                                   [(int_nvvm_barrier_sync imm:$i)]>,
114        Requires<[hasPTX60, hasSM30]>;
115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
116                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
117        Requires<[hasPTX60, hasSM30]>;
118
119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
120                 "barrier.sync \t$id, $cnt;",
121                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
122        Requires<[hasPTX60, hasSM30]>;
123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
124                 "barrier.sync \t$id, $cnt;",
125                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
126        Requires<[hasPTX60, hasSM30]>;
127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
128                 "barrier.sync \t$id, $cnt;",
129                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
130        Requires<[hasPTX60, hasSM30]>;
131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
132                 "barrier.sync \t$id, $cnt;",
133                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
134        Requires<[hasPTX60, hasSM30]>;
135
136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
137                 bit offset_imm, bit mask_imm, bit threadmask_imm>
138      : NVPTXInst<(outs), (ins), "?", []> {
139  NVPTXRegClass rc = !cond(
140    !eq(reg, "i32"): Int32Regs,
141    !eq(reg, "f32"): Float32Regs);
142  string IntrName = "int_nvvm_shfl_"
143                    # !if(sync, "sync_", "")
144                    # mode
145                    # "_" # reg
146                    # !if(return_pred, "p", "");
147  Intrinsic Intr = !cast<Intrinsic>(IntrName);
148  let InOperandList = !con(
149    !if(sync,
150        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
151        (ins)),
152    (ins rc:$src),
153    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
154    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
155    );
156  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
157  let AsmString = "shfl."
158     # !if(sync, "sync.", "")
159     # mode # ".b32\t"
160     # "$dst"
161     # !if(return_pred, "|$pred", "") # ", "
162     # "$src, $offset, $mask"
163     # !if(sync, ", $threadmask", "")
164     # ";"
165     ;
166  let Pattern = [!con(
167      !foreach(tmp, OutOperandList,
168             !subst(outs, set,
169             !subst(i32imm, imm, tmp))),
170      (set !foreach(tmp, InOperandList,
171             !subst(ins, Intr,
172             !subst(i32imm, imm, tmp))))
173  )];
174}
175
176foreach sync = [false, true] in {
177  foreach mode = ["up", "down", "bfly", "idx"] in {
178    foreach regclass = ["i32", "f32"] in {
179      foreach return_pred = [false, true] in {
180        foreach offset_imm = [false, true] in {
181          foreach mask_imm = [false, true] in {
182            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
183              def : SHFL_INSTR<sync, mode, regclass, return_pred,
184                               offset_imm, mask_imm, threadmask_imm>,
185                    Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>;
186            }
187          }
188        }
189      }
190    }
191  }
192}
193
194// vote.{all,any,uni,ballot}
195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
196  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
197              "vote." # mode # " \t$dest, $pred;",
198              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
199        Requires<[hasPTX60, hasSM30]>;
200}
201
202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
206
207// vote.sync.{all,any,uni,ballot}
208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
209  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
210              "vote.sync." # mode # " \t$dest, $pred, $mask;",
211              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
212          Requires<[hasPTX60, hasSM30]>;
213  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
214              "vote.sync." # mode #" \t$dest, $pred, $mask;",
215              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
216          Requires<[hasPTX60, hasSM30]>;
217}
218
219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
223
224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
225                          Operand ImmOp> {
226  def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value),
227              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
228              [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>,
229           Requires<[hasPTX60, hasSM70]>;
230  def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value),
231              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
232              [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
233           Requires<[hasPTX60, hasSM70]>;
234  def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value),
235              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
236              [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>,
237           Requires<[hasPTX60, hasSM70]>;
238  def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value),
239              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
240              [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
241           Requires<[hasPTX60, hasSM70]>;
242}
243
244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
245                                        i32imm>;
246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
247                                        i64imm>;
248
249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
250                          Operand ImmOp> {
251  def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
252                     (ins i32imm:$mask, ImmOp:$value),
253              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
254              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
255           Requires<[hasPTX60, hasSM70]>;
256  def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
257                     (ins Int32Regs:$mask, ImmOp:$value),
258              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
259              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
260           Requires<[hasPTX60, hasSM70]>;
261  def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
262                     (ins i32imm:$mask, regclass:$value),
263              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
264              [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
265           Requires<[hasPTX60, hasSM70]>;
266  def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred),
267                     (ins Int32Regs:$mask, regclass:$value),
268              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
269              [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
270           Requires<[hasPTX60, hasSM70]>;
271}
272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
273                                         i32imm>;
274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
275                                         i64imm>;
276
277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
278  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
279          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
280          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
281        Requires<[hasPTX70, hasSM80]>;
282}
283
284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
292
293} // isConvergent = true
294
295//-----------------------------------
296// Explicit Memory Fence Functions
297//-----------------------------------
298class MEMBAR<string StrOp, Intrinsic IntOP> :
299              NVPTXInst<(outs), (ins),
300            StrOp, [(IntOP)]>;
301
302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
303def INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
305
306
307//-----------------------------------
308// Async Copy Functions
309//-----------------------------------
310
311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
312  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
313            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
314            [(Intrin Int32Regs:$addr)]>,
315    Requires<[hasPTX70, hasSM80]>;
316  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
317            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
318            [(Intrin Int64Regs:$addr)]>,
319    Requires<[hasPTX70, hasSM80]>;
320}
321
322defm CP_ASYNC_MBARRIER_ARRIVE :
323  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
325  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
327  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
329  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
330
331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
332  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
333            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
334            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
335    Requires<[hasPTX70, hasSM80]>;
336  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
337            !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
338            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
339    Requires<[hasPTX70, hasSM80]>;
340}
341
342defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
343  CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
344
345defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
346  CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
347
348defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
349  CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
350
351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
352  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
353            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
354            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
355    Requires<[hasPTX70, hasSM80]>;
356  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
357            !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
358            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
359    Requires<[hasPTX70, hasSM80]>;
360}
361
362defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
363  CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
364
365def CP_ASYNC_COMMIT_GROUP :
366  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
367  Requires<[hasPTX70, hasSM80]>;
368
369def CP_ASYNC_WAIT_GROUP :
370  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
371  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
372  Requires<[hasPTX70, hasSM80]>;
373
374def CP_ASYNC_WAIT_ALL :
375  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
376  [(int_nvvm_cp_async_wait_all)]>,
377  Requires<[hasPTX70, hasSM80]>;
378
379//-----------------------------------
380// MBarrier Functions
381//-----------------------------------
382
383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
384  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
385           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
386    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
387    Requires<[hasPTX70, hasSM80]>;
388  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
389           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
390    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
391    Requires<[hasPTX70, hasSM80]>;
392}
393
394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
396                                          int_nvvm_mbarrier_init_shared>;
397
398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
399  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
400           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
401    [(Intrin Int32Regs:$addr)]>,
402    Requires<[hasPTX70, hasSM80]>;
403  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
404           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
405    [(Intrin Int64Regs:$addr)]>,
406    Requires<[hasPTX70, hasSM80]>;
407}
408
409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
411                                            int_nvvm_mbarrier_inval_shared>;
412
413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
414  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
415           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
416    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
417    Requires<[hasPTX70, hasSM80]>;
418  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
419           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
420    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
421    Requires<[hasPTX70, hasSM80]>;
422}
423
424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
425defm MBARRIER_ARRIVE_SHARED :
426  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
427
428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
429  def _32 : NVPTXInst<(outs Int64Regs:$state),
430           (ins Int32Regs:$addr, Int32Regs:$count),
431           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
432                      ".b64 $state, [$addr], $count;"),
433    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
434    Requires<[hasPTX70, hasSM80]>;
435  def _64 : NVPTXInst<(outs Int64Regs:$state),
436           (ins Int64Regs:$addr, Int32Regs:$count),
437           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
438                      ".b64 $state, [$addr], $count;"),
439    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
440    Requires<[hasPTX70, hasSM80]>;
441}
442
443defm MBARRIER_ARRIVE_NOCOMPLETE :
444  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
446  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
447
448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
449  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
450           !strconcat("mbarrier.arrive_drop", AddrSpace,
451                      ".b64 $state, [$addr];"),
452           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
453    Requires<[hasPTX70, hasSM80]>;
454  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
455           !strconcat("mbarrier.arrive_drop", AddrSpace,
456                      ".b64 $state, [$addr];"),
457           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
458    Requires<[hasPTX70, hasSM80]>;
459}
460
461defm MBARRIER_ARRIVE_DROP :
462  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
463defm MBARRIER_ARRIVE_DROP_SHARED :
464  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
465
466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
467  def _32 : NVPTXInst<(outs Int64Regs:$state),
468           (ins Int32Regs:$addr, Int32Regs:$count),
469           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
470                      ".b64 $state, [$addr], $count;"),
471           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
472    Requires<[hasPTX70, hasSM80]>;
473  def _64 : NVPTXInst<(outs Int64Regs:$state),
474           (ins Int64Regs:$addr, Int32Regs:$count),
475           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
476                      ".b64 $state, [$addr], $count;"),
477           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
478    Requires<[hasPTX70, hasSM80]>;
479}
480
481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
482  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
484  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
485                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
486
487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
488  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
489           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
490           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
491    Requires<[hasPTX70, hasSM80]>;
492  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
493           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
494           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
495    Requires<[hasPTX70, hasSM80]>;
496}
497
498defm MBARRIER_TEST_WAIT :
499  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
500defm MBARRIER_TEST_WAIT_SHARED :
501  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
502
503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
504           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
505           "mbarrier.pending_count.b64 $res, $state;",
506           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
507    Requires<[hasPTX70, hasSM80]>;
508
509def MBARRIER_PENDING_COUNT :
510  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
511
512//-----------------------------------
513// Math Functions
514//-----------------------------------
515
516// Map min(1.0, max(0.0, x)) to sat(x)
517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
518// NaN
519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
520// Same story for fmax, fmin.
521
522def : Pat<(int_nvvm_fmin_f immFloat1,
523            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
524          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
525def : Pat<(int_nvvm_fmin_f immFloat1,
526            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
527          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
528def : Pat<(int_nvvm_fmin_f
529            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
530          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
531def : Pat<(int_nvvm_fmin_f
532            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
533          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
534
535def : Pat<(int_nvvm_fmin_d immDouble1,
536            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
537          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
538def : Pat<(int_nvvm_fmin_d immDouble1,
539            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
540          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
541def : Pat<(int_nvvm_fmin_d
542            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
543          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
544def : Pat<(int_nvvm_fmin_d
545            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
546          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
547
548
549// We need a full string for OpcStr here because we need to deal with case like
550// INT_PTX_RECIP.
551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
552  NVPTXRegClass src_regclass, Intrinsic IntOP>
553            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
554            OpcStr,
555        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>;
556
557// We need a full string for OpcStr here because we need to deal with the case
558// like INT_PTX_NATIVE_POWR_F.
559class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
560  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP>
561            : NVPTXInst<(outs t_regclass:$dst),
562              (ins s0_regclass:$src0, s1_regclass:$src1),
563            OpcStr,
564        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>;
565
566class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
567  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
568  NVPTXRegClass s2_regclass, Intrinsic IntOP>
569            : NVPTXInst<(outs t_regclass:$dst),
570              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
571            OpcStr,
572        [(set t_regclass:$dst,
573          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>;
574
575//
576// MISC
577//
578
579def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
580  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
581
582//
583// Min Max
584//
585
586def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
587  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
588def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
589  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
590
591def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
592  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
593def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
594  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
595
596def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
597  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
598def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
599  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
600
601
602//
603// Multiplication
604//
605
606def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
607  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
608def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
609  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
610
611def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
612  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
613def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
614  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
615
616def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
617  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
618def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
619  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
620def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
621  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
622def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
623  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
624def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
625  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
626def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
627  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
628def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
629  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
630def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
631  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
632
633def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
634  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
635def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
636  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
637def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
638  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
639def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
640  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
641
642def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
643  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
644def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
645  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
646
647//
648// Div
649//
650
651def INT_NVVM_DIV_APPROX_FTZ_F
652  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
653    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
654def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
655  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
656
657def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
658  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
659def INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
660  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
661def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
662  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
663def INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
664  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
665def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
666  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
667def INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
668  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
669def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
670  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
671def INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
672  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
673
674def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
675  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
676def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
677  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
678def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
679  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
680def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
681  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
682
683//
684// Sad
685//
686
687def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
688  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
689def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
690  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
691
692//
693// Floor  Ceil
694//
695
696def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
697          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
698def : Pat<(int_nvvm_floor_f Float32Regs:$a),
699          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
700def : Pat<(int_nvvm_floor_d Float64Regs:$a),
701          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
702
703def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
704          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
705def : Pat<(int_nvvm_ceil_f Float32Regs:$a),
706          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
707def : Pat<(int_nvvm_ceil_d Float64Regs:$a),
708          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
709
710//
711// Abs
712//
713
714def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
715  Float32Regs, int_nvvm_fabs_ftz_f>;
716def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
717  Float32Regs, int_nvvm_fabs_f>;
718
719def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
720  Float64Regs, int_nvvm_fabs_d>;
721
722//
723// Round
724//
725
726def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
727          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
728def : Pat<(int_nvvm_round_f Float32Regs:$a),
729          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
730def : Pat<(int_nvvm_round_d Float64Regs:$a),
731          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
732
733//
734// Trunc
735//
736
737def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
738          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
739def : Pat<(int_nvvm_trunc_f Float32Regs:$a),
740          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
741def : Pat<(int_nvvm_trunc_d Float64Regs:$a),
742          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
743
744//
745// Saturate
746//
747
748def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
749          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
750def : Pat<(int_nvvm_saturate_f Float32Regs:$a),
751          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
752def : Pat<(int_nvvm_saturate_d Float64Regs:$a),
753          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
754
755//
756// Exp2  Log2
757//
758
759def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
760  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
761def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
762  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
763def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
764  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
765
766def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
767  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
768def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
769  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
770def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
771  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
772
773//
774// Sin  Cos
775//
776
777def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
778  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
779def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
780  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
781
782def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
783  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
784def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
785  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
786
787//
788// Fma
789//
790
791def INT_NVVM_FMA_RN_FTZ_F
792  : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
793    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>;
794def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;",
795  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>;
796def INT_NVVM_FMA_RZ_FTZ_F
797  : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
798    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>;
799def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;",
800  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>;
801def INT_NVVM_FMA_RM_FTZ_F
802  : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
803    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>;
804def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;",
805  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>;
806def INT_NVVM_FMA_RP_FTZ_F
807  : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs,
808    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>;
809def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;",
810  Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>;
811
812def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;",
813  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>;
814def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;",
815  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>;
816def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;",
817  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>;
818def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;",
819  Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>;
820
821//
822// Rcp
823//
824
825def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
826  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
827def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
828  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
829def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
830  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
831def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
832  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
833def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
834  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
835def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
836  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
837def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
838  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
839def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
840  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
841
842def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
843  Float64Regs, int_nvvm_rcp_rn_d>;
844def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
845  Float64Regs, int_nvvm_rcp_rz_d>;
846def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
847  Float64Regs, int_nvvm_rcp_rm_d>;
848def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
849  Float64Regs, int_nvvm_rcp_rp_d>;
850
851def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
852  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
853
854//
855// Sqrt
856//
857
858def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
859  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
860def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
861  Float32Regs, int_nvvm_sqrt_rn_f>;
862def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
863  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
864def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
865  Float32Regs, int_nvvm_sqrt_rz_f>;
866def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
867  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
868def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
869  Float32Regs, int_nvvm_sqrt_rm_f>;
870def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
871  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
872def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
873  Float32Regs, int_nvvm_sqrt_rp_f>;
874def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
875  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
876def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
877  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
878
879def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
880  Float64Regs, int_nvvm_sqrt_rn_d>;
881def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
882  Float64Regs, int_nvvm_sqrt_rz_d>;
883def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
884  Float64Regs, int_nvvm_sqrt_rm_d>;
885def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
886  Float64Regs, int_nvvm_sqrt_rp_d>;
887
888// nvvm_sqrt intrinsic
889def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
890          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
891def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
892          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
893def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
894          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
895def : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
896          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
897
898//
899// Rsqrt
900//
901
902def INT_NVVM_RSQRT_APPROX_FTZ_F
903  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
904    int_nvvm_rsqrt_approx_ftz_f>;
905def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
906  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
907def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
908  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
909
910//
911// Add
912//
913
914def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
915  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
916def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
917  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
918def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
919  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
920def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
921  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
922def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
923  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
924def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
925  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
926def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
927  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
928def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
929  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
930
931def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
932  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
933def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
934  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
935def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
936  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
937def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
938  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
939
940//
941// Convert
942//
943
944def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
945          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
946def : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
947          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
948def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
949          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
950def : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
951          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
952def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
953          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
954def : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
955          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
956def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
957          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
958def : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
959          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
960
961def : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
962          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
963def : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
964          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
965def : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
966          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
967def : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
968          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
969
970def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
971          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
972def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
973          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
974def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
975          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
976def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
977          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
978
979def : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
980          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
981def : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
982          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
983def : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
984          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
985def : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
986          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
987
988def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
989          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
990def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
991          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
992def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
993          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
994def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
995          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
996
997def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
998          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
999def : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
1000          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
1001def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
1002          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1003def : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
1004          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
1005def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
1006          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1007def : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
1008          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
1009def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
1010          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1011def : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
1012          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
1013
1014def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
1015          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1016def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
1017          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
1018def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
1019          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1020def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
1021          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
1022def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
1023          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1024def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
1025          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
1026def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
1027          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1028def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
1029          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
1030
1031def : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
1032          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
1033def : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
1034          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
1035def : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
1036          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
1037def : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
1038          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
1039
1040def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
1041          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
1042def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
1043          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
1044def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
1045          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
1046def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
1047          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
1048
1049def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
1050  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
1051
1052def INT_NVVM_D2I_LO : F_MATH_1<
1053  !strconcat("{{\n\t",
1054             ".reg .b32 %temp; \n\t",
1055             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
1056             "}}"),
1057  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
1058def INT_NVVM_D2I_HI : F_MATH_1<
1059  !strconcat("{{\n\t",
1060             ".reg .b32 %temp; \n\t",
1061             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
1062             "}}"),
1063  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
1064
1065def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
1066          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1067def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
1068          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
1069def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
1070          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1071def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
1072          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
1073def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
1074          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1075def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
1076          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
1077def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
1078          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1079def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
1080          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
1081
1082def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
1083          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
1084def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
1085          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
1086def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
1087          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
1088def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
1089          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
1090def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
1091          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
1092def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
1093          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
1094def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
1095          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
1096def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
1097          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
1098
1099def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
1100          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
1101def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
1102          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
1103def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
1104          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
1105def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
1106          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
1107
1108def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
1109          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
1110def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
1111          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
1112def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
1113          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
1114def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
1115          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
1116
1117def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
1118          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
1119def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
1120          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
1121def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
1122          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
1123def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
1124          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
1125
1126def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
1127          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
1128def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
1129          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
1130def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
1131          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
1132def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
1133          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
1134
1135def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
1136          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
1137def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
1138          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
1139def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
1140          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
1141def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
1142          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
1143
1144def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
1145          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
1146def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
1147          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
1148def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
1149          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
1150def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
1151          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
1152
1153
1154def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
1155          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
1156def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
1157          (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
1158
1159//
1160// Bitcast
1161//
1162
1163def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
1164  Float32Regs, int_nvvm_bitcast_f2i>;
1165def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
1166  Int32Regs, int_nvvm_bitcast_i2f>;
1167
1168def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
1169  Int64Regs, int_nvvm_bitcast_ll2d>;
1170def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
1171  Float64Regs, int_nvvm_bitcast_d2ll>;
1172
1173//
1174// FNS
1175//
1176
1177class INT_FNS_MBO<dag ins, dag Operands>
1178  : NVPTXInst<(outs Int32Regs:$dst), ins,
1179               "fns.b32 \t$dst, $mask, $base, $offset;",
1180               [(set Int32Regs:$dst, Operands )]>,
1181    Requires<[hasPTX60, hasSM30]>;
1182
1183def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
1184                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1185def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
1186                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
1187def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
1188                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
1189def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
1190                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
1191def INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
1192                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
1193def INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
1194                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
1195def INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
1196                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
1197def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
1198                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
1199
1200//-----------------------------------
1201// Atomic Functions
1202//-----------------------------------
1203
1204class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
1205 : PatFrag<ops, frag, AS_match.global>;
1206class ATOMIC_SHARED_CHK <dag ops, dag frag>
1207 : PatFrag<ops, frag, AS_match.shared>;
1208class ATOMIC_GENERIC_CHK <dag ops, dag frag>
1209 : PatFrag<ops, frag, AS_match.generic>;
1210
1211multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1212  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1213  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
1214  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1215    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
1216    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1217  Requires<Pred>;
1218  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
1219    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
1220    [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>,
1221  Requires<Pred>;
1222}
1223multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1224  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
1225  list<Predicate> Pred = []> {
1226  defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1227    IntOp, IMMType, IMM, Pred>;
1228  defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1229    IntOp, IMMType, IMM, Pred>;
1230}
1231
1232// has 2 operands, neg the second one
1233multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1234  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1235  list<Predicate> Pred> {
1236  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
1237    !strconcat(
1238      "{{ \n\t",
1239      ".reg \t.s", TypeStr, " temp; \n\t",
1240      "neg.s", TypeStr, " \ttemp, $b; \n\t",
1241      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
1242      "}}"),
1243    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>,
1244  Requires<Pred>;
1245}
1246multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
1247  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
1248 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1249   IntOp, Pred> ;
1250 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1251   IntOp, Pred> ;
1252}
1253
1254// has 3 operands
1255multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
1256  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1257  Operand IMMType, list<Predicate> Pred> {
1258  def reg : NVPTXInst<(outs regclass:$dst),
1259    (ins ptrclass:$addr, regclass:$b, regclass:$c),
1260    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1261    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>,
1262  Requires<Pred>;
1263
1264  def imm1 : NVPTXInst<(outs regclass:$dst),
1265    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
1266    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1267    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>,
1268  Requires<Pred>;
1269
1270  def imm2 : NVPTXInst<(outs regclass:$dst),
1271    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
1272    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
1273    [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>,
1274  Requires<Pred>;
1275
1276  def imm3 : NVPTXInst<(outs regclass:$dst),
1277    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
1278    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
1279    [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>,
1280  Requires<Pred>;
1281}
1282multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
1283  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
1284  defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
1285    IntOp, IMMType, Pred>;
1286  defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
1287    IntOp, IMMType, Pred>;
1288}
1289
1290// atom_add
1291
1292def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1293  (atomic_load_add_32 node:$a, node:$b)>;
1294def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1295  (atomic_load_add_32 node:$a, node:$b)>;
1296def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1297  (atomic_load_add_32 node:$a, node:$b)>;
1298def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1299  (atomic_load_add_64 node:$a, node:$b)>;
1300def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1301  (atomic_load_add_64 node:$a, node:$b)>;
1302def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1303  (atomic_load_add_64 node:$a, node:$b)>;
1304def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1305  (atomic_load_fadd node:$a, node:$b)>;
1306def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1307  (atomic_load_fadd node:$a, node:$b)>;
1308def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1309  (atomic_load_fadd node:$a, node:$b)>;
1310
1311defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
1312  atomic_load_add_32_g, i32imm, imm>;
1313defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add",
1314  atomic_load_add_32_s, i32imm, imm>;
1315defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add",
1316  atomic_load_add_32_gen, i32imm, imm>;
1317defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1318  ".add", atomic_load_add_32_gen, i32imm, imm>;
1319
1320defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add",
1321  atomic_load_add_64_g, i64imm, imm>;
1322defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add",
1323  atomic_load_add_64_s, i64imm, imm>;
1324defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add",
1325  atomic_load_add_64_gen, i64imm, imm>;
1326defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1327  ".add", atomic_load_add_64_gen, i64imm, imm>;
1328
1329defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add",
1330  atomic_load_add_g, f32imm, fpimm>;
1331defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
1332  atomic_load_add_s, f32imm, fpimm>;
1333defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
1334  atomic_load_add_gen, f32imm, fpimm>;
1335
1336defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1337  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
1338defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1339  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
1340defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1341  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
1342
1343// atom_sub
1344
1345def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1346  (atomic_load_sub_32 node:$a, node:$b)>;
1347def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1348  (atomic_load_sub_32 node:$a, node:$b)>;
1349def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1350  (atomic_load_sub_32 node:$a, node:$b)>;
1351def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1352  (atomic_load_sub_64 node:$a, node:$b)>;
1353def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1354  (atomic_load_sub_64 node:$a, node:$b)>;
1355def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1356  (atomic_load_sub_64 node:$a, node:$b)>;
1357
1358defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
1359  atomic_load_sub_32_g>;
1360defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
1361  atomic_load_sub_64_g>;
1362defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
1363  atomic_load_sub_32_gen>;
1364defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
1365  ".add", atomic_load_sub_32_gen>;
1366defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
1367  atomic_load_sub_32_s>;
1368defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
1369  atomic_load_sub_64_s>;
1370defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
1371  atomic_load_sub_64_gen>;
1372defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
1373  ".add", atomic_load_sub_64_gen>;
1374
1375// atom_swap
1376
1377def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1378  (atomic_swap_32 node:$a, node:$b)>;
1379def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1380  (atomic_swap_32 node:$a, node:$b)>;
1381def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1382  (atomic_swap_32 node:$a, node:$b)>;
1383def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1384  (atomic_swap_64 node:$a, node:$b)>;
1385def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1386  (atomic_swap_64 node:$a, node:$b)>;
1387def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1388  (atomic_swap_64 node:$a, node:$b)>;
1389
1390defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch",
1391  atomic_swap_32_g, i32imm, imm>;
1392defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch",
1393  atomic_swap_32_s, i32imm, imm>;
1394defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch",
1395  atomic_swap_32_gen, i32imm, imm>;
1396defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1397  ".exch", atomic_swap_32_gen, i32imm, imm>;
1398defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch",
1399  atomic_swap_64_g, i64imm, imm>;
1400defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch",
1401  atomic_swap_64_s, i64imm, imm>;
1402defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch",
1403  atomic_swap_64_gen, i64imm, imm>;
1404defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1405  ".exch", atomic_swap_64_gen, i64imm, imm>;
1406
1407// atom_max
1408
1409def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1410  , (atomic_load_max_32 node:$a, node:$b)>;
1411def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1412  (atomic_load_max_32 node:$a, node:$b)>;
1413def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1414  (atomic_load_max_32 node:$a, node:$b)>;
1415def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1416  , (atomic_load_max_64 node:$a, node:$b)>;
1417def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1418  (atomic_load_max_64 node:$a, node:$b)>;
1419def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1420  (atomic_load_max_64 node:$a, node:$b)>;
1421def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1422  (atomic_load_umax_32 node:$a, node:$b)>;
1423def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1424  (atomic_load_umax_32 node:$a, node:$b)>;
1425def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1426  (atomic_load_umax_32 node:$a, node:$b)>;
1427def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1428  (atomic_load_umax_64 node:$a, node:$b)>;
1429def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1430  (atomic_load_umax_64 node:$a, node:$b)>;
1431def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1432  (atomic_load_umax_64 node:$a, node:$b)>;
1433
1434defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1435  ".max", atomic_load_max_32_g, i32imm, imm>;
1436defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1437  ".max", atomic_load_max_32_s, i32imm, imm>;
1438defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
1439  atomic_load_max_32_gen, i32imm, imm>;
1440defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1441  ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
1442defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1443  ".max", atomic_load_max_64_g, i64imm, imm>;
1444defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1445  ".max", atomic_load_max_64_s, i64imm, imm>;
1446defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
1447  atomic_load_max_64_gen, i64imm, imm>;
1448defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1449  ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>;
1450defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1451  ".max", atomic_load_umax_32_g, i32imm, imm>;
1452defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1453  ".max", atomic_load_umax_32_s, i32imm, imm>;
1454defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
1455  atomic_load_umax_32_gen, i32imm, imm>;
1456defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1457  ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
1458defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1459  ".max", atomic_load_umax_64_g, i64imm, imm>;
1460defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1461  ".max", atomic_load_umax_64_s, i64imm, imm>;
1462defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
1463  atomic_load_umax_64_gen, i64imm, imm>;
1464defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1465  ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>;
1466
1467// atom_min
1468
1469def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1470  (atomic_load_min_32 node:$a, node:$b)>;
1471def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1472  (atomic_load_min_32 node:$a, node:$b)>;
1473def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1474  (atomic_load_min_32 node:$a, node:$b)>;
1475def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1476  (atomic_load_min_64 node:$a, node:$b)>;
1477def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1478  (atomic_load_min_64 node:$a, node:$b)>;
1479def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1480  (atomic_load_min_64 node:$a, node:$b)>;
1481def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1482  (atomic_load_umin_32 node:$a, node:$b)>;
1483def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1484  (atomic_load_umin_32 node:$a, node:$b)>;
1485def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1486  (atomic_load_umin_32 node:$a, node:$b)>;
1487def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1488  (atomic_load_umin_64 node:$a, node:$b)>;
1489def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1490  (atomic_load_umin_64 node:$a, node:$b)>;
1491def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1492  (atomic_load_umin_64 node:$a, node:$b)>;
1493
1494defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
1495  ".min", atomic_load_min_32_g, i32imm, imm>;
1496defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32",
1497  ".min", atomic_load_min_32_s, i32imm, imm>;
1498defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
1499  atomic_load_min_32_gen, i32imm, imm>;
1500defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1501  ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
1502defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
1503  ".min", atomic_load_min_64_g, i64imm, imm>;
1504defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
1505  ".min", atomic_load_min_64_s, i64imm, imm>;
1506defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
1507  atomic_load_min_64_gen, i64imm, imm>;
1508defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1509  ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>;
1510defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1511  ".min", atomic_load_umin_32_g, i32imm, imm>;
1512defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
1513  ".min", atomic_load_umin_32_s, i32imm, imm>;
1514defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
1515  atomic_load_umin_32_gen, i32imm, imm>;
1516defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
1517  ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
1518defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
1519  ".min", atomic_load_umin_64_g, i64imm, imm>;
1520defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
1521  ".min", atomic_load_umin_64_s, i64imm, imm>;
1522defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
1523  atomic_load_umin_64_gen, i64imm, imm>;
1524defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
1525  ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>;
1526
1527// atom_inc  atom_dec
1528
1529def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1530  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1531def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1532  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1533def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1534  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
1535def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1536  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1537def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1538  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1539def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1540  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
1541
1542defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc",
1543  atomic_load_inc_32_g, i32imm, imm>;
1544defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc",
1545  atomic_load_inc_32_s, i32imm, imm>;
1546defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc",
1547  atomic_load_inc_32_gen, i32imm, imm>;
1548defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1549  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
1550defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec",
1551  atomic_load_dec_32_g, i32imm, imm>;
1552defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec",
1553  atomic_load_dec_32_s, i32imm, imm>;
1554defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec",
1555  atomic_load_dec_32_gen, i32imm, imm>;
1556defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32",
1557  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
1558
1559// atom_and
1560
1561def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1562  (atomic_load_and_32 node:$a, node:$b)>;
1563def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1564  (atomic_load_and_32 node:$a, node:$b)>;
1565def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1566  (atomic_load_and_32 node:$a, node:$b)>;
1567def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1568  (atomic_load_and_64 node:$a, node:$b)>;
1569def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1570  (atomic_load_and_64 node:$a, node:$b)>;
1571def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1572  (atomic_load_and_64 node:$a, node:$b)>;
1573
1574defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
1575  atomic_load_and_32_g, i32imm, imm>;
1576defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and",
1577  atomic_load_and_32_s, i32imm, imm>;
1578defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
1579  atomic_load_and_32_gen, i32imm, imm>;
1580defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1581  ".and", atomic_load_and_32_gen, i32imm, imm>;
1582defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
1583  atomic_load_and_64_g, i64imm, imm>;
1584defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
1585  atomic_load_and_64_s, i64imm, imm>;
1586defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
1587  atomic_load_and_64_gen, i64imm, imm>;
1588defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1589  ".and", atomic_load_and_64_gen, i64imm, imm>;
1590
1591// atom_or
1592
1593def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1594  (atomic_load_or_32 node:$a, node:$b)>;
1595def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1596  (atomic_load_or_32 node:$a, node:$b)>;
1597def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1598  (atomic_load_or_32 node:$a, node:$b)>;
1599def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1600  (atomic_load_or_64 node:$a, node:$b)>;
1601def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1602  (atomic_load_or_64 node:$a, node:$b)>;
1603def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1604  (atomic_load_or_64 node:$a, node:$b)>;
1605
1606defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
1607  atomic_load_or_32_g, i32imm, imm>;
1608defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or",
1609  atomic_load_or_32_gen, i32imm, imm>;
1610defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1611  ".or", atomic_load_or_32_gen, i32imm, imm>;
1612defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
1613  atomic_load_or_32_s, i32imm, imm>;
1614defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
1615  atomic_load_or_64_g, i64imm, imm>;
1616defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
1617  atomic_load_or_64_gen, i64imm, imm>;
1618defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1619  ".or", atomic_load_or_64_gen, i64imm, imm>;
1620defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
1621  atomic_load_or_64_s, i64imm, imm>;
1622
1623// atom_xor
1624
1625def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1626  (atomic_load_xor_32 node:$a, node:$b)>;
1627def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1628  (atomic_load_xor_32 node:$a, node:$b)>;
1629def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1630  (atomic_load_xor_32 node:$a, node:$b)>;
1631def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1632  (atomic_load_xor_64 node:$a, node:$b)>;
1633def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1634  (atomic_load_xor_64 node:$a, node:$b)>;
1635def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1636  (atomic_load_xor_64 node:$a, node:$b)>;
1637
1638defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
1639  atomic_load_xor_32_g, i32imm, imm>;
1640defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor",
1641  atomic_load_xor_32_s, i32imm, imm>;
1642defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
1643  atomic_load_xor_32_gen, i32imm, imm>;
1644defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
1645  ".xor", atomic_load_xor_32_gen, i32imm, imm>;
1646defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
1647  atomic_load_xor_64_g, i64imm, imm>;
1648defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
1649  atomic_load_xor_64_s, i64imm, imm>;
1650defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
1651  atomic_load_xor_64_gen, i64imm, imm>;
1652defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
1653  ".xor", atomic_load_xor_64_gen, i64imm, imm>;
1654
1655// atom_cas
1656
1657def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1658  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1659def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1660  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1661def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1662  (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>;
1663def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
1664  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1665def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
1666  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1667def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
1668  (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>;
1669
1670defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas",
1671  atomic_cmp_swap_32_g, i32imm>;
1672defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas",
1673  atomic_cmp_swap_32_s, i32imm>;
1674defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas",
1675  atomic_cmp_swap_32_gen, i32imm>;
1676defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32",
1677  ".cas", atomic_cmp_swap_32_gen, i32imm>;
1678defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas",
1679  atomic_cmp_swap_64_g, i64imm>;
1680defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas",
1681  atomic_cmp_swap_64_s, i64imm>;
1682defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
1683  atomic_cmp_swap_64_gen, i64imm>;
1684defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
1685  ".cas", atomic_cmp_swap_64_gen, i64imm>;
1686
1687// Support for scoped atomic operations.  Matches
1688// int_nvvm_atomic_{op}_{space}_{type}_{scope}
1689// and converts it into the appropriate instruction.
1690// NOTE: not all possible combinations are implemented
1691//  'space' is limited to generic as it's the only one needed to support CUDA.
1692//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
1693class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
1694                  dag ins, dag Operands>
1695      : NVPTXInst<(outs regclass:$result), ins,
1696                  AsmStr,
1697                  [(set regclass:$result, Operands)]>,
1698        Requires<Preds>;
1699
1700// Define instruction variants for all addressing modes.
1701multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
1702                       NVPTXRegClass regclass, Operand ImmType,
1703                       SDNode Imm, ValueType ImmTy,
1704                       list<Predicate> Preds> {
1705  let AddedComplexity = 1 in {
1706    def : ATOM23_impl<AsmStr, regclass, Preds,
1707                      (ins Int32Regs:$src, regclass:$b),
1708                      (Intr Int32Regs:$src, regclass:$b)>;
1709    def : ATOM23_impl<AsmStr, regclass, Preds,
1710                      (ins Int64Regs:$src, regclass:$b),
1711                      (Intr Int64Regs:$src, regclass:$b)>;
1712  }
1713  // tablegen can't infer argument types from Intrinsic (though it can
1714  // from Instruction) so we have to enforce specific type on
1715  // immediates via explicit cast to ImmTy.
1716  def : ATOM23_impl<AsmStr, regclass, Preds,
1717                    (ins Int32Regs:$src, ImmType:$b),
1718                    (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
1719  def : ATOM23_impl<AsmStr, regclass, Preds,
1720                    (ins Int64Regs:$src, ImmType:$b),
1721                    (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
1722}
1723
1724multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
1725                       NVPTXRegClass regclass, Operand ImmType,
1726                       SDNode Imm, ValueType ImmTy,
1727                       list<Predicate> Preds> {
1728  // Variants for register/immediate permutations of $b and $c
1729  let AddedComplexity = 2 in {
1730    def : ATOM23_impl<AsmStr, regclass, Preds,
1731                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
1732                      (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
1733    def : ATOM23_impl<AsmStr, regclass, Preds,
1734                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
1735                      (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
1736  }
1737  let AddedComplexity = 1 in {
1738    def : ATOM23_impl<AsmStr, regclass, Preds,
1739                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
1740                      (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1741    def : ATOM23_impl<AsmStr, regclass, Preds,
1742                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
1743                      (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
1744    def : ATOM23_impl<AsmStr, regclass, Preds,
1745                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
1746                      (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1747    def : ATOM23_impl<AsmStr, regclass, Preds,
1748                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
1749                      (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
1750  }
1751  def : ATOM23_impl<AsmStr, regclass, Preds,
1752                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
1753                    (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1754  def : ATOM23_impl<AsmStr, regclass, Preds,
1755                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
1756                    (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
1757}
1758
1759// Constructs instrinsic name and instruction asm strings.
1760multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
1761                       string ScopeStr, string SpaceStr,
1762                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1763                       ValueType ImmTy, list<Predicate> Preds> {
1764  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1765                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1766                            # "." # OpStr # "." # TypeStr
1767                            # " \t$result, [$src], $b;",
1768                     !cast<Intrinsic>(
1769                            "int_nvvm_atomic_" # OpStr
1770                            # "_" # SpaceStr # "_" # IntTypeStr
1771                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1772                     regclass, ImmType, Imm, ImmTy, Preds>;
1773}
1774multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
1775                       string ScopeStr, string SpaceStr,
1776                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1777                       ValueType ImmTy, list<Predicate> Preds> {
1778  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
1779                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
1780                            # "." # OpStr # "." # TypeStr
1781                            # " \t$result, [$src], $b, $c;",
1782                     !cast<Intrinsic>(
1783                            "int_nvvm_atomic_" # OpStr
1784                            # "_" # SpaceStr # "_" # IntTypeStr
1785                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
1786                     regclass, ImmType, Imm, ImmTy, Preds>;
1787}
1788
1789// Constructs variants for different address spaces.
1790// For now we only need variants for generic space pointers.
1791multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
1792                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1793                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1794   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1795                            regclass, ImmType, Imm, ImmTy, Preds>;
1796}
1797multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
1798                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
1799                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
1800   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
1801                            regclass, ImmType, Imm, ImmTy, Preds>;
1802}
1803
1804// Constructs variants for different scopes of atomic op.
1805multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
1806                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
1807                       ValueType ImmTy, list<Predicate> Preds> {
1808   // .gpu scope is default and is currently covered by existing
1809   // atomics w/o explicitly specified scope.
1810   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1811                           regclass, ImmType, Imm, ImmTy,
1812                           !listconcat(Preds,[hasAtomScope])>;
1813   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1814                           regclass, ImmType, Imm, ImmTy,
1815                           !listconcat(Preds,[hasAtomScope])>;
1816}
1817multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
1818           NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
1819           list<Predicate> Preds> {
1820   // No need to define ".gpu"-scoped atomics.  They do the same thing
1821   // as the regular, non-scoped atomics defined elsewhere.
1822   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
1823                           regclass, ImmType, Imm, ImmTy,
1824                           !listconcat(Preds,[hasAtomScope])>;
1825   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
1826                           regclass, ImmType, Imm, ImmTy,
1827                           !listconcat(Preds,[hasAtomScope])>;
1828}
1829
1830// atom.add
1831multiclass ATOM2_add_impl<string OpStr> {
1832   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1833   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1834   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
1835   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
1836                            []>;
1837   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
1838                            [hasAtomAddF64]>;
1839}
1840
1841// atom.{and,or,xor}
1842multiclass ATOM2_bitwise_impl<string OpStr> {
1843   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1844   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
1845                            [hasAtomBitwise64]>;
1846}
1847
1848// atom.exch
1849multiclass ATOM2_exch_impl<string OpStr> {
1850   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1851   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1852}
1853
1854// atom.{min,max}
1855multiclass ATOM2_minmax_impl<string OpStr> {
1856   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
1857   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1858   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
1859                            [hasAtomMinMax64]>;
1860   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
1861                            [hasAtomMinMax64]>;
1862}
1863
1864// atom.{inc,dec}
1865multiclass ATOM2_incdec_impl<string OpStr> {
1866   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
1867}
1868
1869// atom.cas
1870multiclass ATOM3_cas_impl<string OpStr> {
1871   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
1872   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
1873}
1874
1875defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
1876defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
1877defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
1878defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
1879defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
1880defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
1881defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
1882defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
1883defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
1884defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
1885
1886//-----------------------------------
1887// Support for ldu on sm_20 or later
1888//-----------------------------------
1889
1890// Don't annotate ldu instructions as mayLoad, as they load from memory that is
1891// read-only in a kernel.
1892
1893// Scalar
1894
1895multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
1896  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
1897               !strconcat("ldu.global.", TyStr),
1898                      []>, Requires<[hasLDU]>;
1899  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
1900               !strconcat("ldu.global.", TyStr),
1901                        []>, Requires<[hasLDU]>;
1902 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
1903               !strconcat("ldu.global.", TyStr),
1904                      []>, Requires<[hasLDU]>;
1905 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
1906               !strconcat("ldu.global.", TyStr),
1907                      []>, Requires<[hasLDU]>;
1908 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
1909               !strconcat("ldu.global.", TyStr),
1910                        []>, Requires<[hasLDU]>;
1911}
1912
1913defm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
1914defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
1915defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1916defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1917defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
1918defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
1919defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
1920defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
1921defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
1922defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
1923
1924// vector
1925
1926// Elementized vector ldu
1927multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
1928 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1929                     (ins Int32Regs:$src),
1930                     !strconcat("ldu.global.", TyStr), []>;
1931 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1932                     (ins Int64Regs:$src),
1933                     !strconcat("ldu.global.", TyStr), []>;
1934 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1935                     (ins MEMri:$src),
1936                     !strconcat("ldu.global.", TyStr), []>;
1937 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1938                     (ins MEMri64:$src),
1939                     !strconcat("ldu.global.", TyStr), []>;
1940 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
1941                     (ins imemAny:$src),
1942                     !strconcat("ldu.global.", TyStr), []>;
1943}
1944
1945multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
1946 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1947                            regclass:$dst4), (ins Int32Regs:$src),
1948               !strconcat("ldu.global.", TyStr), []>;
1949 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1950                            regclass:$dst4), (ins Int64Regs:$src),
1951               !strconcat("ldu.global.", TyStr), []>;
1952 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1953                            regclass:$dst4), (ins MEMri:$src),
1954               !strconcat("ldu.global.", TyStr), []>;
1955 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1956                            regclass:$dst4), (ins MEMri64:$src),
1957               !strconcat("ldu.global.", TyStr), []>;
1958 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
1959                            regclass:$dst4), (ins imemAny:$src),
1960               !strconcat("ldu.global.", TyStr), []>;
1961}
1962
1963defm INT_PTX_LDU_G_v2i8_ELE
1964  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
1965defm INT_PTX_LDU_G_v2i16_ELE
1966  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
1967defm INT_PTX_LDU_G_v2i32_ELE
1968  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
1969defm INT_PTX_LDU_G_v2f16_ELE
1970  : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
1971defm INT_PTX_LDU_G_v2f16x2_ELE
1972  : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
1973defm INT_PTX_LDU_G_v2f32_ELE
1974  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
1975defm INT_PTX_LDU_G_v2i64_ELE
1976  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
1977defm INT_PTX_LDU_G_v2f64_ELE
1978  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
1979defm INT_PTX_LDU_G_v4i8_ELE
1980  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
1981defm INT_PTX_LDU_G_v4i16_ELE
1982  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1983    Int16Regs>;
1984defm INT_PTX_LDU_G_v4i32_ELE
1985  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1986    Int32Regs>;
1987defm INT_PTX_LDU_G_v4f16_ELE
1988  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1989    Float16Regs>;
1990defm INT_PTX_LDU_G_v4f16x2_ELE
1991  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1992    Float16x2Regs>;
1993defm INT_PTX_LDU_G_v4f32_ELE
1994  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
1995    Float32Regs>;
1996
1997
1998//-----------------------------------
1999// Support for ldg on sm_35 or later
2000//-----------------------------------
2001
2002// Don't annotate ld.global.nc as mayLoad, because these loads go through the
2003// non-coherent texture cache, and therefore the values read must be read-only
2004// during the lifetime of the kernel.
2005
2006multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
2007  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
2008               !strconcat("ld.global.nc.", TyStr),
2009                      []>, Requires<[hasLDG]>;
2010  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
2011               !strconcat("ld.global.nc.", TyStr),
2012                        []>, Requires<[hasLDG]>;
2013 def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
2014               !strconcat("ld.global.nc.", TyStr),
2015                      []>, Requires<[hasLDG]>;
2016 def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
2017               !strconcat("ld.global.nc.", TyStr),
2018                      []>, Requires<[hasLDG]>;
2019 def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
2020               !strconcat("ld.global.nc.", TyStr),
2021                        []>, Requires<[hasLDG]>;
2022}
2023
2024defm INT_PTX_LDG_GLOBAL_i8
2025  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
2026defm INT_PTX_LDG_GLOBAL_i16
2027  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
2028defm INT_PTX_LDG_GLOBAL_i32
2029  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2030defm INT_PTX_LDG_GLOBAL_i64
2031  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2032defm INT_PTX_LDG_GLOBAL_f16
2033  : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
2034defm INT_PTX_LDG_GLOBAL_f16x2
2035  : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
2036defm INT_PTX_LDG_GLOBAL_f32
2037  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
2038defm INT_PTX_LDG_GLOBAL_f64
2039  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
2040defm INT_PTX_LDG_GLOBAL_p32
2041  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
2042defm INT_PTX_LDG_GLOBAL_p64
2043  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
2044
2045// vector
2046
2047// Elementized vector ldg
2048multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
2049 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2050                     (ins Int32Regs:$src),
2051                     !strconcat("ld.global.nc.", TyStr), []>;
2052 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2053                     (ins Int64Regs:$src),
2054                     !strconcat("ld.global.nc.", TyStr), []>;
2055 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2056                     (ins MEMri:$src),
2057                     !strconcat("ld.global.nc.", TyStr), []>;
2058 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2059                     (ins MEMri64:$src),
2060                     !strconcat("ld.global.nc.", TyStr), []>;
2061 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2062                     (ins imemAny:$src),
2063                     !strconcat("ld.global.nc.", TyStr), []>;
2064}
2065
2066multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
2067  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2068                              regclass:$dst4), (ins Int32Regs:$src),
2069               !strconcat("ld.global.nc.", TyStr), []>;
2070  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2071                               regclass:$dst4), (ins Int64Regs:$src),
2072               !strconcat("ld.global.nc.", TyStr), []>;
2073  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2074                              regclass:$dst4), (ins MEMri:$src),
2075               !strconcat("ld.global.nc.", TyStr), []>;
2076  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2077                              regclass:$dst4), (ins MEMri64:$src),
2078               !strconcat("ld.global.nc.", TyStr), []>;
2079  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2080                             regclass:$dst4), (ins imemAny:$src),
2081               !strconcat("ld.global.nc.", TyStr), []>;
2082}
2083
2084// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2085defm INT_PTX_LDG_G_v2i8_ELE
2086  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
2087defm INT_PTX_LDG_G_v2i16_ELE
2088  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
2089defm INT_PTX_LDG_G_v2i32_ELE
2090  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
2091defm INT_PTX_LDG_G_v2f16_ELE
2092  : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
2093defm INT_PTX_LDG_G_v2f16x2_ELE
2094  : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
2095defm INT_PTX_LDG_G_v2f32_ELE
2096  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
2097defm INT_PTX_LDG_G_v2i64_ELE
2098  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
2099defm INT_PTX_LDG_G_v2f64_ELE
2100  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
2101defm INT_PTX_LDG_G_v4i8_ELE
2102  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2103defm INT_PTX_LDG_G_v4i16_ELE
2104  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
2105defm INT_PTX_LDG_G_v4i32_ELE
2106  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
2107defm INT_PTX_LDG_G_v4f16_ELE
2108  : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
2109defm INT_PTX_LDG_G_v4f16x2_ELE
2110  : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
2111defm INT_PTX_LDG_G_v4f32_ELE
2112  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
2113
2114
2115multiclass NG_TO_G<string Str, Intrinsic Intrin> {
2116   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2117          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
2118      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2119   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2120          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
2121      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2122   def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
2123          "{{ .reg .b64 %tmp;\n\t"
2124          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
2125          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
2126      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2127      Requires<[useShortPtr]>;
2128}
2129
2130multiclass G_TO_NG<string Str, Intrinsic Intrin> {
2131   def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
2132          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
2133      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2134   def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
2135          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
2136      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2137   def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
2138          "{{ .reg .b64 %tmp;\n\t"
2139          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
2140          #"  cvt.u32.u64 \t$result, %tmp; }}",
2141      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2142      Requires<[useShortPtr]>;
2143}
2144
2145defm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>;
2146defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>;
2147defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>;
2148defm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>;
2149
2150defm cvta_to_local   : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>;
2151defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>;
2152defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>;
2153defm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>;
2154
2155
2156// nvvm.ptr.gen.to.param
2157def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
2158  (ins Int32Regs:$src),
2159                        "mov.u32 \t$result, $src;",
2160                              [(set Int32Regs:$result,
2161                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
2162def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
2163  (ins Int64Regs:$src),
2164                        "mov.u64 \t$result, $src;",
2165                              [(set Int64Regs:$result,
2166                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
2167
2168
2169// nvvm.move intrinsicc
2170def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
2171                             "mov.b16 \t$r, $s;",
2172                             [(set Int16Regs:$r,
2173                               (int_nvvm_move_i16 Int16Regs:$s))]>;
2174def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2175                             "mov.b32 \t$r, $s;",
2176                             [(set Int32Regs:$r,
2177                               (int_nvvm_move_i32 Int32Regs:$s))]>;
2178def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2179                             "mov.b64 \t$r, $s;",
2180                             [(set Int64Regs:$r,
2181                               (int_nvvm_move_i64 Int64Regs:$s))]>;
2182def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
2183                             "mov.f32 \t$r, $s;",
2184                             [(set Float32Regs:$r,
2185                               (int_nvvm_move_float Float32Regs:$s))]>;
2186def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
2187                             "mov.f64 \t$r, $s;",
2188                             [(set Float64Regs:$r,
2189                               (int_nvvm_move_double Float64Regs:$s))]>;
2190def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
2191                             "mov.u32 \t$r, $s;",
2192                             [(set Int32Regs:$r,
2193                               (int_nvvm_move_ptr Int32Regs:$s))]>;
2194def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
2195                             "mov.u64 \t$r, $s;",
2196                             [(set Int64Regs:$r,
2197                               (int_nvvm_move_ptr Int64Regs:$s))]>;
2198
2199// @TODO: Are these actually needed, or will we always just see symbols
2200// copied to registers first?
2201/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
2202                             "mov.u32 \t$r, $s;",
2203                             [(set Int32Regs:$r,
2204                             (int_nvvm_move_ptr texternalsym:$s))]>;
2205def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
2206                             "mov.u64 \t$r, $s;",
2207                             [(set Int64Regs:$r,
2208                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
2209
2210
2211// MoveParam        %r1, param
2212// ptr_local_to_gen %r2, %r1
2213// ptr_gen_to_local %r3, %r2
2214// ->
2215// mov %r1, param
2216
2217// @TODO: Revisit this.  There is a type
2218// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
2219// instructions are not currently defined. However, we can use the ptr
2220// variants and the asm printer will do the right thing.
2221def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2222                (MoveParam texternalsym:$src)))),
2223               (nvvm_move_ptr64  texternalsym:$src)>;
2224def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
2225                (MoveParam texternalsym:$src)))),
2226               (nvvm_move_ptr32  texternalsym:$src)>;
2227
2228def texsurf_handles
2229  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
2230              "mov.u64 \t$result, $src;", []>;
2231
2232//-----------------------------------
2233// Compiler Error Warn
2234// - Just ignore them in codegen
2235//-----------------------------------
2236
2237def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2238                "// llvm.nvvm.compiler.warn()",
2239                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
2240def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2241                "// llvm.nvvm.compiler.warn()",
2242                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
2243def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
2244                "// llvm.nvvm.compiler.error()",
2245                [(int_nvvm_compiler_error Int32Regs:$a)]>;
2246def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
2247                "// llvm.nvvm.compiler.error()",
2248                [(int_nvvm_compiler_error Int64Regs:$a)]>;
2249
2250
2251// isspacep
2252
2253def ISSPACEP_CONST_32
2254  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2255              "isspacep.const \t$d, $a;",
2256              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
2257    Requires<[hasPTX31]>;
2258def ISSPACEP_CONST_64
2259  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2260              "isspacep.const \t$d, $a;",
2261              [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
2262    Requires<[hasPTX31]>;
2263def ISSPACEP_GLOBAL_32
2264  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2265              "isspacep.global \t$d, $a;",
2266              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
2267def ISSPACEP_GLOBAL_64
2268  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2269              "isspacep.global \t$d, $a;",
2270              [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
2271def ISSPACEP_LOCAL_32
2272  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2273              "isspacep.local \t$d, $a;",
2274              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
2275def ISSPACEP_LOCAL_64
2276  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2277              "isspacep.local \t$d, $a;",
2278              [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
2279def ISSPACEP_SHARED_32
2280  : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2281              "isspacep.shared \t$d, $a;",
2282              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
2283def ISSPACEP_SHARED_64
2284  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2285              "isspacep.shared \t$d, $a;",
2286              [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
2287
2288
2289// Special register reads
2290def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
2291                            (ins SpecialRegs:$r),
2292                            "mov.b32 \t$d, $r;", []>;
2293
2294def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
2295def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
2296def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
2297def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
2298def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
2299def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
2300def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
2301def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
2302def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
2303def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
2304def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
2305def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
2306def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
2307def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
2308def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
2309def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
2310def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
2311def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
2312def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
2313def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
2314def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
2315def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
2316def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
2317def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
2318def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
2319def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
2320def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
2321def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
2322def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
2323def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
2324def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
2325def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
2326
2327
2328// rotate builtin support
2329
2330def ROTATE_B32_HW_IMM
2331  : NVPTXInst<(outs Int32Regs:$dst),
2332              (ins  Int32Regs:$src, i32imm:$amt),
2333              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2334              [(set Int32Regs:$dst,
2335                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
2336              Requires<[hasHWROT32]> ;
2337
2338def ROTATE_B32_HW_REG
2339  : NVPTXInst<(outs Int32Regs:$dst),
2340              (ins  Int32Regs:$src, Int32Regs:$amt),
2341              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
2342              [(set Int32Regs:$dst,
2343                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
2344              Requires<[hasHWROT32]> ;
2345
2346def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
2347          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2348      Requires<[noHWROT32]> ;
2349
2350def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
2351          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
2352      Requires<[noHWROT32]> ;
2353
2354let hasSideEffects = false in {
2355  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2356    !strconcat("{{\n\t",
2357               ".reg .b32 %dummy;\n\t",
2358               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
2359               "}}"),
2360          []> ;
2361
2362  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
2363    !strconcat("{{\n\t",
2364               ".reg .b32 %dummy;\n\t",
2365               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
2366               "}}"),
2367          []> ;
2368}
2369
2370let hasSideEffects = false in {
2371  def PACK_TWO_INT32
2372    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
2373                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
2374}
2375
2376def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
2377          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
2378                          (GET_LO_INT64 Int64Regs:$src))> ;
2379
2380// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
2381// no side effects.
2382let hasSideEffects = false in {
2383  def SHF_L_WRAP_B32_IMM
2384    : NVPTXInst<(outs Int32Regs:$dst),
2385                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2386                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2387      Requires<[hasHWROT32]>;
2388
2389  def SHF_L_WRAP_B32_REG
2390    : NVPTXInst<(outs Int32Regs:$dst),
2391                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2392                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2393      Requires<[hasHWROT32]>;
2394
2395  def SHF_R_WRAP_B32_IMM
2396    : NVPTXInst<(outs Int32Regs:$dst),
2397                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
2398                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2399      Requires<[hasHWROT32]>;
2400
2401  def SHF_R_WRAP_B32_REG
2402    : NVPTXInst<(outs Int32Regs:$dst),
2403                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
2404                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
2405      Requires<[hasHWROT32]>;
2406}
2407
2408// HW version of rotate 64
2409def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2410          (PACK_TWO_INT32
2411            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2412                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
2413            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2414                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
2415      Requires<[hasHWROT32]>;
2416
2417def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2418          (PACK_TWO_INT32
2419            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2420                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
2421            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2422                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2423      Requires<[hasHWROT32]>;
2424
2425
2426def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2427          (PACK_TWO_INT32
2428            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
2429                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
2430            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
2431                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
2432      Requires<[hasHWROT32]>;
2433
2434def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2435          (PACK_TWO_INT32
2436            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
2437                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
2438            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
2439                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
2440      Requires<[hasHWROT32]>;
2441
2442// SW version of rotate 64
2443def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
2444          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
2445      Requires<[noHWROT32]>;
2446def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
2447          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2448      Requires<[noHWROT32]>;
2449def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
2450          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
2451      Requires<[noHWROT32]>;
2452def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
2453          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
2454      Requires<[noHWROT32]>;
2455
2456
2457//-----------------------------------
2458// Texture Intrinsics
2459//-----------------------------------
2460
2461// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
2462// also defined in NVPTXReplaceImageHandles.cpp
2463
2464// texmode_independent
2465let IsTex = true, IsTexModeUnified = false in {
2466// Texture fetch instructions using handles
2467
2468class TEX_1D_base<string inst, NVPTXRegClass outtype,
2469                  NVPTXRegClass intype, dag texsamp>
2470    : NVPTXInst<(outs outtype:$r, outtype:$g,
2471                      outtype:$b, outtype:$a),
2472                 !con(texsamp, (ins intype:$x)),
2473                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
2474                 []>;
2475
2476multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2477  def _RR : TEX_1D_base<inst, outtype, intype,
2478                        (ins Int64Regs:$t, Int64Regs:$s)>;
2479  def _RI : TEX_1D_base<inst, outtype, intype,
2480                        (ins Int64Regs:$t, i64imm:$s)>;
2481  def _IR : TEX_1D_base<inst, outtype, intype,
2482                        (ins i64imm:$t, Int64Regs:$s)>;
2483  def _II : TEX_1D_base<inst, outtype, intype,
2484                        (ins i64imm:$t, i64imm:$s)>;
2485}
2486
2487defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2488defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2489defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2490defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2491defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2492defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2493
2494class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2495                        NVPTXRegClass intype, dag texsamp>
2496    : NVPTXInst<(outs outtype:$r, outtype:$g,
2497                      outtype:$b, outtype:$a),
2498                 !con(texsamp, (ins intype:$x, intype:$lod)),
2499                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
2500                 []>;
2501
2502multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2503                        NVPTXRegClass intype> {
2504  def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2505                              (ins Int64Regs:$t, Int64Regs:$s)>;
2506  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2507                              (ins Int64Regs:$t, i64imm:$s)>;
2508  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2509                              (ins i64imm:$t, Int64Regs:$s)>;
2510  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2511                              (ins i64imm:$t, i64imm:$s)>;
2512}
2513
2514defm TEX_1D_F32_F32_LEVEL :
2515  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2516defm TEX_1D_S32_F32_LEVEL :
2517  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2518defm TEX_1D_U32_F32_LEVEL :
2519  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2520
2521class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2522                       NVPTXRegClass intype, dag texsamp>
2523    : NVPTXInst<(outs outtype:$r, outtype:$g,
2524                      outtype:$b, outtype:$a),
2525                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2526                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2527                        " \\{$gradx\\}, \\{$grady\\};",
2528                 []>;
2529
2530multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2531                       NVPTXRegClass intype> {
2532  def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2533                             (ins Int64Regs:$t, Int64Regs:$s)>;
2534  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2535                             (ins Int64Regs:$t, i64imm:$s)>;
2536  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2537                             (ins i64imm:$t, Int64Regs:$s)>;
2538  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2539                             (ins i64imm:$t, i64imm:$s)>;
2540}
2541
2542defm TEX_1D_F32_F32_GRAD
2543  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2544defm TEX_1D_S32_F32_GRAD
2545  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2546defm TEX_1D_U32_F32_GRAD
2547  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2548
2549class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2550                        NVPTXRegClass intype, dag texsamp>
2551    : NVPTXInst<(outs outtype:$r, outtype:$g,
2552                      outtype:$b, outtype:$a),
2553                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2554                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2555                 []>;
2556
2557multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2558                        NVPTXRegClass intype> {
2559  def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2560                              (ins Int64Regs:$t, Int64Regs:$s)>;
2561  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2562                              (ins Int64Regs:$t, i64imm:$s)>;
2563  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2564                              (ins i64imm:$t, Int64Regs:$s)>;
2565  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2566                              (ins i64imm:$t, i64imm:$s)>;
2567}
2568
2569defm TEX_1D_ARRAY_F32_F32
2570  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2571defm TEX_1D_ARRAY_F32_S32
2572  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2573defm TEX_1D_ARRAY_S32_S32
2574  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2575defm TEX_1D_ARRAY_S32_F32
2576  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2577defm TEX_1D_ARRAY_U32_S32
2578  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2579defm TEX_1D_ARRAY_U32_F32
2580  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2581
2582class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2583                              NVPTXRegClass intype, dag texsamp>
2584    : NVPTXInst<(outs outtype:$r, outtype:$g,
2585                      outtype:$b, outtype:$a),
2586                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2587                 inst # " \t\\{$r, $g, $b, $a\\},"
2588                        " [$t, $s, \\{$l, $x\\}], $lod;",
2589                 []>;
2590
2591multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2592                              NVPTXRegClass intype> {
2593  def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2594                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2595  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2596                                    (ins Int64Regs:$t, i64imm:$s)>;
2597  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2598                                    (ins i64imm:$t, Int64Regs:$s)>;
2599  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2600                                    (ins i64imm:$t, i64imm:$s)>;
2601}
2602
2603defm TEX_1D_ARRAY_F32_F32_LEVEL
2604  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2605defm TEX_1D_ARRAY_S32_F32_LEVEL
2606  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2607defm TEX_1D_ARRAY_U32_F32_LEVEL
2608  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2609
2610class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2611                             NVPTXRegClass intype, dag texsamp>
2612    : NVPTXInst<(outs outtype:$r, outtype:$g,
2613                      outtype:$b, outtype:$a),
2614                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2615                                    intype:$gradx, intype:$grady)),
2616                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2617                        " \\{$gradx\\}, \\{$grady\\};",
2618                 []>;
2619
2620multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2621                             NVPTXRegClass intype> {
2622  def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2623                                   (ins Int64Regs:$t, Int64Regs:$s)>;
2624  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2625                                   (ins Int64Regs:$t, i64imm:$s)>;
2626  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2627                                   (ins i64imm:$t, Int64Regs:$s)>;
2628  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2629                                   (ins i64imm:$t, i64imm:$s)>;
2630}
2631
2632defm TEX_1D_ARRAY_F32_F32_GRAD
2633  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2634defm TEX_1D_ARRAY_S32_F32_GRAD
2635  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2636defm TEX_1D_ARRAY_U32_F32_GRAD
2637  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2638
2639class TEX_2D_base<string inst, NVPTXRegClass outtype,
2640                  NVPTXRegClass intype, dag texsamp>
2641    : NVPTXInst<(outs outtype:$r, outtype:$g,
2642                      outtype:$b, outtype:$a),
2643                 !con(texsamp, (ins intype:$x, intype:$y)),
2644                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
2645                 []>;
2646
2647multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2648  def _RR : TEX_2D_base<inst, outtype, intype,
2649                        (ins Int64Regs:$t, Int64Regs:$s)>;
2650  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2651  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2652  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2653}
2654
2655defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2656defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2657defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2658defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2659defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2660defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2661
2662class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2663                        NVPTXRegClass intype, dag texsamp>
2664    : NVPTXInst<(outs outtype:$r, outtype:$g,
2665                      outtype:$b, outtype:$a),
2666                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2667                 inst # " \t\\{$r, $g, $b, $a\\},"
2668                        " [$t, $s, \\{$x, $y\\}], $lod;",
2669                 []>;
2670
2671multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2672                        NVPTXRegClass intype> {
2673  def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2674                              (ins Int64Regs:$t, Int64Regs:$s)>;
2675  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2676                              (ins Int64Regs:$t, i64imm:$s)>;
2677  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2678                              (ins i64imm:$t, Int64Regs:$s)>;
2679  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2680                              (ins i64imm:$t, i64imm:$s)>;
2681}
2682
2683defm TEX_2D_F32_F32_LEVEL :
2684  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2685defm TEX_2D_S32_F32_LEVEL :
2686  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2687defm TEX_2D_U32_F32_LEVEL :
2688  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2689
2690class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
2691                       NVPTXRegClass intype, dag texsamp>
2692    : NVPTXInst<(outs outtype:$r, outtype:$g,
2693                      outtype:$b, outtype:$a),
2694                 !con(texsamp, (ins intype:$x, intype:$y,
2695                                    intype:$gradx0, intype:$gradx1,
2696                                    intype:$grady0, intype:$grady1)),
2697                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
2698                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2699                 []>;
2700
2701multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
2702                       NVPTXRegClass intype> {
2703  def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
2704                              (ins Int64Regs:$t, Int64Regs:$s)>;
2705  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
2706                              (ins Int64Regs:$t, i64imm:$s)>;
2707  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
2708                              (ins i64imm:$t, Int64Regs:$s)>;
2709  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
2710                              (ins i64imm:$t, i64imm:$s)>;
2711}
2712
2713defm TEX_2D_F32_F32_GRAD :
2714  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2715defm TEX_2D_S32_F32_GRAD :
2716  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2717defm TEX_2D_U32_F32_GRAD :
2718  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2719
2720class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
2721                        NVPTXRegClass intype, dag texsamp>
2722    : NVPTXInst<(outs outtype:$r, outtype:$g,
2723                      outtype:$b, outtype:$a),
2724                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
2725                 inst # " \t\\{$r, $g, $b, $a\\},"
2726                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
2727                 []>;
2728
2729multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
2730                        NVPTXRegClass intype> {
2731  def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
2732                              (ins Int64Regs:$t, Int64Regs:$s)>;
2733  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
2734                              (ins Int64Regs:$t, i64imm:$s)>;
2735  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
2736                              (ins i64imm:$t, Int64Regs:$s)>;
2737  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
2738                              (ins i64imm:$t, i64imm:$s)>;
2739}
2740
2741defm TEX_2D_ARRAY_F32_F32
2742  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2743defm TEX_2D_ARRAY_F32_S32
2744  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
2745defm TEX_2D_ARRAY_S32_S32
2746  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
2747defm TEX_2D_ARRAY_S32_F32
2748  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2749defm TEX_2D_ARRAY_U32_S32
2750  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
2751defm TEX_2D_ARRAY_U32_F32
2752  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2753
2754class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2755                              NVPTXRegClass intype, dag texsamp>
2756    : NVPTXInst<(outs outtype:$r, outtype:$g,
2757                      outtype:$b, outtype:$a),
2758                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2759                                    intype:$lod)),
2760                 inst # " \t\\{$r, $g, $b, $a\\},"
2761                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
2762                 []>;
2763
2764multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2765                              NVPTXRegClass intype> {
2766  def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2767                              (ins Int64Regs:$t, Int64Regs:$s)>;
2768  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2769                              (ins Int64Regs:$t, i64imm:$s)>;
2770  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2771                              (ins i64imm:$t, Int64Regs:$s)>;
2772  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
2773                              (ins i64imm:$t, i64imm:$s)>;
2774}
2775
2776defm TEX_2D_ARRAY_F32_F32_LEVEL
2777  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2778defm TEX_2D_ARRAY_S32_F32_LEVEL
2779  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2780defm TEX_2D_ARRAY_U32_F32_LEVEL
2781  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2782
2783class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2784                             NVPTXRegClass intype, dag texsamp>
2785    : NVPTXInst<(outs outtype:$r, outtype:$g,
2786                      outtype:$b, outtype:$a),
2787                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2788                                    intype:$gradx0, intype:$gradx1,
2789                                    intype:$grady0, intype:$grady1)),
2790                 inst # " \t\\{$r, $g, $b, $a\\},"
2791                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
2792                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
2793                 []>;
2794
2795multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2796                             NVPTXRegClass intype> {
2797  def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
2798                              (ins Int64Regs:$t, Int64Regs:$s)>;
2799  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
2800                              (ins Int64Regs:$t, i64imm:$s)>;
2801  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
2802                              (ins i64imm:$t, Int64Regs:$s)>;
2803  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
2804                              (ins i64imm:$t, i64imm:$s)>;
2805}
2806
2807defm TEX_2D_ARRAY_F32_F32_GRAD
2808  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
2809defm TEX_2D_ARRAY_S32_F32_GRAD
2810  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
2811defm TEX_2D_ARRAY_U32_F32_GRAD
2812  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
2813
2814class TEX_3D_base<string inst, NVPTXRegClass outtype,
2815                  NVPTXRegClass intype, dag texsamp>
2816    : NVPTXInst<(outs outtype:$r, outtype:$g,
2817                      outtype:$b, outtype:$a),
2818                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
2819                 inst # " \t\\{$r, $g, $b, $a\\},"
2820                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
2821                 []>;
2822
2823multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2824  def _RR : TEX_3D_base<inst, outtype, intype,
2825                              (ins Int64Regs:$t, Int64Regs:$s)>;
2826  def _RI : TEX_3D_base<inst, outtype, intype,
2827                              (ins Int64Regs:$t, i64imm:$s)>;
2828  def _IR : TEX_3D_base<inst, outtype, intype,
2829                              (ins i64imm:$t, Int64Regs:$s)>;
2830  def _II : TEX_3D_base<inst, outtype, intype,
2831                              (ins i64imm:$t, i64imm:$s)>;
2832}
2833
2834defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
2835defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
2836defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
2837defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
2838defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
2839defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
2840
2841class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
2842                        NVPTXRegClass intype, dag texsamp>
2843    : NVPTXInst<(outs outtype:$r, outtype:$g,
2844                      outtype:$b, outtype:$a),
2845                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
2846                                    intype:$lod)),
2847                 inst # " \t\\{$r, $g, $b, $a\\},"
2848                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2849                 []>;
2850
2851multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
2852                        NVPTXRegClass intype> {
2853  def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
2854                              (ins Int64Regs:$t, Int64Regs:$s)>;
2855  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
2856                              (ins Int64Regs:$t, i64imm:$s)>;
2857  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
2858                              (ins i64imm:$t, Int64Regs:$s)>;
2859  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
2860                              (ins i64imm:$t, i64imm:$s)>;
2861}
2862
2863defm TEX_3D_F32_F32_LEVEL
2864  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
2865defm TEX_3D_S32_F32_LEVEL
2866  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
2867defm TEX_3D_U32_F32_LEVEL
2868  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
2869
2870class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
2871                       NVPTXRegClass intype, dag texsamp>
2872    : NVPTXInst<(outs outtype:$r, outtype:$g,
2873                      outtype:$b, outtype:$a),
2874                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
2875                                    intype :$gradx0, intype:$gradx1,
2876                                    intype:$gradx2, intype:$grady0,
2877                                    intype:$grady1, intype:$grady2)),
2878                 inst # " \t\\{$r, $g, $b, $a\\},"
2879                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
2880                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
2881                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
2882                 []>;
2883
2884multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
2885                       NVPTXRegClass intype> {
2886  def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
2887                             (ins Int64Regs:$t, Int64Regs:$s)>;
2888  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
2889                             (ins Int64Regs:$t, i64imm:$s)>;
2890  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
2891                             (ins i64imm:$t, Int64Regs:$s)>;
2892  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
2893                             (ins i64imm:$t, i64imm:$s)>;
2894}
2895
2896defm TEX_3D_F32_F32_GRAD
2897  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
2898defm TEX_3D_S32_F32_GRAD
2899  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
2900defm TEX_3D_U32_F32_GRAD
2901  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
2902
2903class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
2904                    NVPTXRegClass intype, dag texsamp>
2905    : NVPTXInst<(outs outtype:$r, outtype:$g,
2906                      outtype:$b, outtype:$a),
2907                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
2908                 inst # " \t\\{$r, $g, $b, $a\\},"
2909                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
2910                 []>;
2911
2912multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2913  def _RR : TEX_CUBE_base<inst, outtype, intype,
2914                          (ins Int64Regs:$t, Int64Regs:$s)>;
2915  def _RI : TEX_CUBE_base<inst, outtype, intype,
2916                          (ins Int64Regs:$t, i64imm:$s)>;
2917  def _IR : TEX_CUBE_base<inst, outtype, intype,
2918                          (ins i64imm:$t, Int64Regs:$s)>;
2919  def _II : TEX_CUBE_base<inst, outtype, intype,
2920                          (ins i64imm:$t, i64imm:$s)>;
2921}
2922
2923defm TEX_CUBE_F32_F32
2924  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
2925defm TEX_CUBE_S32_F32
2926  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
2927defm TEX_CUBE_U32_F32
2928  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
2929
2930class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
2931                          NVPTXRegClass intype, dag texsamp>
2932    : NVPTXInst<(outs outtype:$r, outtype:$g,
2933                      outtype:$b, outtype:$a),
2934                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
2935                                    intype:$lod)),
2936                 inst # " \t\\{$r, $g, $b, $a\\},"
2937                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
2938                 []>;
2939
2940multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
2941                          NVPTXRegClass intype> {
2942  def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
2943                                (ins Int64Regs:$t, Int64Regs:$s)>;
2944  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
2945                                (ins Int64Regs:$t, i64imm:$s)>;
2946  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
2947                                (ins i64imm:$t, Int64Regs:$s)>;
2948  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
2949                                (ins i64imm:$t, i64imm:$s)>;
2950}
2951
2952defm TEX_CUBE_F32_F32_LEVEL
2953  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
2954defm TEX_CUBE_S32_F32_LEVEL
2955  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
2956defm TEX_CUBE_U32_F32_LEVEL
2957  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
2958
2959class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
2960                          NVPTXRegClass intype, dag texsamp>
2961    : NVPTXInst<(outs outtype:$r, outtype:$g,
2962                      outtype:$b, outtype:$a),
2963                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2964                                    intype:$z)),
2965                 inst # " \t\\{$r, $g, $b, $a\\},"
2966                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
2967                 []>;
2968
2969multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
2970                          NVPTXRegClass intype> {
2971  def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
2972                                (ins Int64Regs:$t, Int64Regs:$s)>;
2973  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
2974                                (ins Int64Regs:$t, i64imm:$s)>;
2975  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
2976                                (ins i64imm:$t, Int64Regs:$s)>;
2977  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
2978                                (ins i64imm:$t, i64imm:$s)>;
2979}
2980
2981defm TEX_CUBE_ARRAY_F32_F32
2982  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
2983defm TEX_CUBE_ARRAY_S32_F32
2984  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
2985defm TEX_CUBE_ARRAY_U32_F32
2986  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
2987
2988class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2989                                NVPTXRegClass intype, dag texsamp>
2990    : NVPTXInst<(outs outtype:$r, outtype:$g,
2991                      outtype:$b, outtype:$a),
2992                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
2993                                    intype:$z, intype:$lod)),
2994                 inst # " \t\\{$r, $g, $b, $a\\},"
2995                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
2996                 []>;
2997
2998multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2999                                NVPTXRegClass intype> {
3000  def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3001                                      (ins Int64Regs:$t, Int64Regs:$s)>;
3002  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3003                                      (ins Int64Regs:$t, i64imm:$s)>;
3004  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3005                                      (ins i64imm:$t, Int64Regs:$s)>;
3006  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3007                                      (ins i64imm:$t, i64imm:$s)>;
3008}
3009
3010defm TEX_CUBE_ARRAY_F32_F32_LEVEL
3011  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3012                         Float32Regs, Float32Regs>;
3013defm TEX_CUBE_ARRAY_S32_F32_LEVEL
3014  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3015                         Int32Regs, Float32Regs>;
3016defm TEX_CUBE_ARRAY_U32_F32_LEVEL
3017  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3018                         Int32Regs, Float32Regs>;
3019
3020class TLD4_2D_base<string inst, NVPTXRegClass outtype,
3021                   NVPTXRegClass intype, dag texsamp>
3022    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3023                      outtype:$v2, outtype:$v3),
3024                 !con(texsamp, (ins intype:$x, intype:$y)),
3025                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
3026                 []>;
3027
3028multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3029  def _RR : TLD4_2D_base<inst, outtype, intype,
3030                         (ins Int64Regs:$t, Int64Regs:$s)>;
3031  def _RI : TLD4_2D_base<inst, outtype, intype,
3032                         (ins Int64Regs:$t, i64imm:$s)>;
3033  def _IR : TLD4_2D_base<inst, outtype, intype,
3034                         (ins i64imm:$t, Int64Regs:$s)>;
3035  def _II : TLD4_2D_base<inst, outtype, intype,
3036                         (ins i64imm:$t, i64imm:$s)>;
3037}
3038
3039defm TLD4_R_2D_F32_F32
3040  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3041defm TLD4_G_2D_F32_F32
3042  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3043defm TLD4_B_2D_F32_F32
3044  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3045defm TLD4_A_2D_F32_F32
3046  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3047
3048defm TLD4_R_2D_S32_F32
3049  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3050defm TLD4_G_2D_S32_F32
3051  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3052defm TLD4_B_2D_S32_F32
3053  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3054defm TLD4_A_2D_S32_F32
3055  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3056
3057defm TLD4_R_2D_U32_F32
3058  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3059defm TLD4_G_2D_U32_F32
3060  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3061defm TLD4_B_2D_U32_F32
3062  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3063defm TLD4_A_2D_U32_F32
3064  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3065
3066}
3067
3068
3069// texmode_unified
3070let IsTex = true, IsTexModeUnified = true in {
3071// Texture fetch instructions using handles
3072
3073class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3074                          NVPTXRegClass intype, dag tex>
3075    : NVPTXInst<(outs outtype:$r, outtype:$g,
3076                      outtype:$b, outtype:$a),
3077                 !con(tex, (ins intype:$x)),
3078                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
3079                 []>;
3080
3081multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3082                          NVPTXRegClass intype> {
3083  def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3084  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3085}
3086
3087defm TEX_UNIFIED_1D_F32_S32
3088  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3089defm TEX_UNIFIED_1D_F32_F32
3090  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3091defm TEX_UNIFIED_1D_S32_S32
3092  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3093defm TEX_UNIFIED_1D_S32_F32
3094  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3095defm TEX_UNIFIED_1D_U32_S32
3096  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3097defm TEX_UNIFIED_1D_U32_F32
3098  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3099
3100class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3101                                NVPTXRegClass intype, dag tex>
3102    : NVPTXInst<(outs outtype:$r, outtype:$g,
3103                      outtype:$b, outtype:$a),
3104                 !con(tex, (ins intype:$x, intype:$lod)),
3105                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
3106                 []>;
3107
3108multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3109                                NVPTXRegClass intype> {
3110  def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3111  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3112}
3113
3114defm TEX_UNIFIED_1D_F32_F32_LEVEL
3115  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3116defm TEX_UNIFIED_1D_S32_F32_LEVEL
3117  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3118defm TEX_UNIFIED_1D_U32_F32_LEVEL
3119  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3120
3121class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3122                               NVPTXRegClass intype, dag tex>
3123    : NVPTXInst<(outs outtype:$r, outtype:$g,
3124                      outtype:$b, outtype:$a),
3125                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3126                 inst # " \t\\{$r, $g, $b, $a\\},"
3127                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
3128                 []>;
3129
3130multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3131                               NVPTXRegClass intype> {
3132  def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3133  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3134}
3135
3136defm TEX_UNIFIED_1D_F32_F32_GRAD
3137  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3138defm TEX_UNIFIED_1D_S32_F32_GRAD
3139  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3140defm TEX_UNIFIED_1D_U32_F32_GRAD
3141  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3142
3143class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3144                                NVPTXRegClass intype, dag tex>
3145    : NVPTXInst<(outs outtype:$r, outtype:$g,
3146                      outtype:$b, outtype:$a),
3147                 !con(tex, (ins Int32Regs:$l, intype:$x)),
3148                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
3149                 []>;
3150
3151multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3152                                NVPTXRegClass intype> {
3153  def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3154  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3155}
3156
3157defm TEX_UNIFIED_1D_ARRAY_F32_S32
3158  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3159defm TEX_UNIFIED_1D_ARRAY_F32_F32
3160  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3161defm TEX_UNIFIED_1D_ARRAY_S32_S32
3162  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3163defm TEX_UNIFIED_1D_ARRAY_S32_F32
3164  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3165defm TEX_UNIFIED_1D_ARRAY_U32_S32
3166  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3167defm TEX_UNIFIED_1D_ARRAY_U32_F32
3168  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3169
3170class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3171                                      NVPTXRegClass intype, dag tex>
3172    : NVPTXInst<(outs outtype:$r, outtype:$g,
3173                      outtype:$b, outtype:$a),
3174                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3175                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
3176                 []>;
3177
3178multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3179                                      NVPTXRegClass intype> {
3180  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3181                                           (ins Int64Regs:$t)>;
3182  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3183                                           (ins i64imm:$t)>;
3184}
3185
3186defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3187  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3188                               Float32Regs, Float32Regs>;
3189defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3190  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3191                               Int32Regs, Float32Regs>;
3192defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3193  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3194                               Int32Regs, Float32Regs>;
3195
3196class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3197                                     NVPTXRegClass intype, dag tex>
3198    : NVPTXInst<(outs outtype:$r, outtype:$g,
3199                      outtype:$b, outtype:$a),
3200                 !con(tex, (ins Int32Regs:$l, intype:$x,
3201                                intype:$gradx, intype:$grady)),
3202                 inst # " \t\\{$r, $g, $b, $a\\},"
3203                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
3204                 []>;
3205
3206multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3207                                     NVPTXRegClass intype> {
3208  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3209                                          (ins Int64Regs:$t)>;
3210  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3211                                          (ins i64imm:$t)>;
3212}
3213
3214defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3215  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3216                              Float32Regs, Float32Regs>;
3217defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3218  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3219                              Int32Regs, Float32Regs>;
3220defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3221  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3222                              Int32Regs, Float32Regs>;
3223
3224class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3225                          NVPTXRegClass intype, dag tex>
3226    : NVPTXInst<(outs outtype:$r, outtype:$g,
3227                      outtype:$b, outtype:$a),
3228                 !con(tex, (ins intype:$x, intype:$y)),
3229                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
3230                 []>;
3231
3232multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3233                          NVPTXRegClass intype> {
3234  def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3235  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3236}
3237
3238defm TEX_UNIFIED_2D_F32_S32
3239  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3240defm TEX_UNIFIED_2D_F32_F32
3241  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3242defm TEX_UNIFIED_2D_S32_S32
3243  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3244defm TEX_UNIFIED_2D_S32_F32
3245  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3246defm TEX_UNIFIED_2D_U32_S32
3247  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3248defm TEX_UNIFIED_2D_U32_F32
3249  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3250
3251class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3252                                NVPTXRegClass intype, dag tex>
3253    : NVPTXInst<(outs outtype:$r, outtype:$g,
3254                      outtype:$b, outtype:$a),
3255                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3256                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
3257                 []>;
3258
3259multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3260                                NVPTXRegClass intype> {
3261  def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3262  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3263}
3264
3265defm TEX_UNIFIED_2D_F32_F32_LEVEL
3266  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3267defm TEX_UNIFIED_2D_S32_F32_LEVEL
3268  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3269defm TEX_UNIFIED_2D_U32_F32_LEVEL
3270  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3271
3272class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3273                               NVPTXRegClass intype, dag tex>
3274    : NVPTXInst<(outs outtype:$r, outtype:$g,
3275                      outtype:$b, outtype:$a),
3276                 !con(tex, (ins intype:$x, intype:$y,
3277                                intype:$gradx0, intype:$gradx1,
3278                                intype:$grady0, intype:$grady1)),
3279                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3280                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3281                 []>;
3282multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3283                               NVPTXRegClass intype> {
3284  def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3285  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3286}
3287
3288defm TEX_UNIFIED_2D_F32_F32_GRAD
3289  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3290defm TEX_UNIFIED_2D_S32_F32_GRAD
3291  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3292defm TEX_UNIFIED_2D_U32_F32_GRAD
3293  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3294
3295class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3296                                NVPTXRegClass intype, dag tex>
3297    : NVPTXInst<(outs outtype:$r, outtype:$g,
3298                      outtype:$b, outtype:$a),
3299                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3300                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
3301                 []>;
3302multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3303                                NVPTXRegClass intype> {
3304  def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3305  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3306}
3307
3308defm TEX_UNIFIED_2D_ARRAY_F32_S32
3309  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3310defm TEX_UNIFIED_2D_ARRAY_F32_F32
3311  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3312defm TEX_UNIFIED_2D_ARRAY_S32_S32
3313  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3314defm TEX_UNIFIED_2D_ARRAY_S32_F32
3315  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3316defm TEX_UNIFIED_2D_ARRAY_U32_S32
3317  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3318defm TEX_UNIFIED_2D_ARRAY_U32_F32
3319  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3320
3321class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3322                                      NVPTXRegClass intype, dag tex>
3323    : NVPTXInst<(outs outtype:$r, outtype:$g,
3324                      outtype:$b, outtype:$a),
3325                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3326                                intype:$lod)),
3327                 inst # " \t\\{$r, $g, $b, $a\\},"
3328                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3329                 []>;
3330multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3331                                      NVPTXRegClass intype> {
3332  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3333                                           (ins Int64Regs:$t)>;
3334  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3335                                           (ins i64imm:$t)>;
3336}
3337
3338defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3339  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3340                               Float32Regs, Float32Regs>;
3341defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3342  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3343                               Int32Regs, Float32Regs>;
3344defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3345  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3346                               Int32Regs, Float32Regs>;
3347
3348class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3349                                     NVPTXRegClass intype, dag tex>
3350    : NVPTXInst<(outs outtype:$r, outtype:$g,
3351                      outtype:$b, outtype:$a),
3352                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3353                                intype:$gradx0, intype:$gradx1,
3354                                intype:$grady0, intype:$grady1)),
3355                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3356                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3357                 []>;
3358multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3359                                     NVPTXRegClass intype> {
3360  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3361                                          (ins Int64Regs:$t)>;
3362  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3363                                          (ins i64imm:$t)>;
3364}
3365
3366defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3367  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3368                              Float32Regs, Float32Regs>;
3369defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3370  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3371                              Int32Regs, Float32Regs>;
3372defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3373  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3374                              Int32Regs, Float32Regs>;
3375
3376class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3377                          NVPTXRegClass intype, dag tex>
3378    : NVPTXInst<(outs outtype:$r, outtype:$g,
3379                      outtype:$b, outtype:$a),
3380                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3381                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3382                 []>;
3383multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3384                          NVPTXRegClass intype> {
3385  def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3386  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3387}
3388
3389defm TEX_UNIFIED_3D_F32_S32
3390  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3391defm TEX_UNIFIED_3D_F32_F32
3392  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3393defm TEX_UNIFIED_3D_S32_S32
3394  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3395defm TEX_UNIFIED_3D_S32_F32
3396  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3397defm TEX_UNIFIED_3D_U32_S32
3398  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3399defm TEX_UNIFIED_3D_U32_F32
3400  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3401
3402class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3403                                NVPTXRegClass intype, dag tex>
3404    : NVPTXInst<(outs outtype:$r, outtype:$g,
3405                      outtype:$b, outtype:$a),
3406                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3407                 inst # " \t\\{$r, $g, $b, $a\\},"
3408                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3409                 []>;
3410multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3411                                NVPTXRegClass intype> {
3412  def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3413  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3414}
3415
3416defm TEX_UNIFIED_3D_F32_F32_LEVEL
3417  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3418defm TEX_UNIFIED_3D_S32_F32_LEVEL
3419  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3420defm TEX_UNIFIED_3D_U32_F32_LEVEL
3421  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3422
3423class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3424                               NVPTXRegClass intype, dag tex>
3425    : NVPTXInst<(outs outtype:$r, outtype:$g,
3426                      outtype:$b, outtype:$a),
3427                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3428                                intype:$gradx0, intype:$gradx1,
3429                                intype:$gradx2, intype:$grady0,
3430                                intype:$grady1, intype:$grady2)),
3431                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
3432                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
3433                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
3434                 []>;
3435multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3436                               NVPTXRegClass intype> {
3437  def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3438  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3439}
3440
3441defm TEX_UNIFIED_3D_F32_F32_GRAD
3442  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3443defm TEX_UNIFIED_3D_S32_F32_GRAD
3444  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3445defm TEX_UNIFIED_3D_U32_F32_GRAD
3446  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3447
3448class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3449                            NVPTXRegClass intype, dag tex>
3450    : NVPTXInst<(outs outtype:$r, outtype:$g,
3451                      outtype:$b, outtype:$a),
3452                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3453                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3454                 []>;
3455multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3456                            NVPTXRegClass intype> {
3457  def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3458  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3459}
3460
3461defm TEX_UNIFIED_CUBE_F32_F32
3462  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3463defm TEX_UNIFIED_CUBE_S32_F32
3464  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3465defm TEX_UNIFIED_CUBE_U32_F32
3466  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3467
3468class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3469                                  NVPTXRegClass intype, dag tex>
3470    : NVPTXInst<(outs outtype:$r, outtype:$g,
3471                      outtype:$b, outtype:$a),
3472                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3473                 inst # " \t\\{$r, $g, $b, $a\\},"
3474                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3475                 []>;
3476multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3477                                  NVPTXRegClass intype> {
3478  def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3479                                       (ins Int64Regs:$t)>;
3480  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3481                                       (ins i64imm:$t)>;
3482}
3483
3484defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3485  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3486                           Float32Regs, Float32Regs>;
3487defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3488  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3489                           Int32Regs, Float32Regs>;
3490defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3491  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3492                           Int32Regs, Float32Regs>;
3493
3494class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3495                                  NVPTXRegClass intype, dag tex>
3496    : NVPTXInst<(outs outtype:$r, outtype:$g,
3497                      outtype:$b, outtype:$a),
3498                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3499                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3500                 []>;
3501multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3502                                  NVPTXRegClass intype> {
3503  def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3504                                       (ins Int64Regs:$t)>;
3505  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3506                                       (ins i64imm:$t)>;
3507}
3508
3509defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3510  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3511defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3512  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3513defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3514  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3515
3516class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3517                                        NVPTXRegClass intype, dag tex>
3518    : NVPTXInst<(outs outtype:$r, outtype:$g,
3519                      outtype:$b, outtype:$a),
3520                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3521                                intype:$lod)),
3522                 inst # " \t\\{$r, $g, $b, $a\\},"
3523                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
3524                 []>;
3525multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3526                                        NVPTXRegClass intype> {
3527  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3528                                             (ins Int64Regs:$t)>;
3529  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3530                                             (ins i64imm:$t)>;
3531}
3532
3533defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3534  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3535                                 Float32Regs, Float32Regs>;
3536defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3537  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3538                                 Int32Regs, Float32Regs>;
3539defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3540  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3541                                 Int32Regs, Float32Regs>;
3542
3543class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3544                           NVPTXRegClass intype, dag tex>
3545    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3546                      outtype:$v2, outtype:$v3),
3547                 !con(tex, (ins intype:$x, intype:$y)),
3548                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
3549                 []>;
3550multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3551                           NVPTXRegClass intype> {
3552  def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3553  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3554}
3555
3556defm TLD4_UNIFIED_R_2D_F32_F32
3557  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3558defm TLD4_UNIFIED_G_2D_F32_F32
3559  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3560defm TLD4_UNIFIED_B_2D_F32_F32
3561  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3562defm TLD4_UNIFIED_A_2D_F32_F32
3563  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3564
3565defm TLD4_UNIFIED_R_2D_S32_F32
3566  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3567defm TLD4_UNIFIED_G_2D_S32_F32
3568  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3569defm TLD4_UNIFIED_B_2D_S32_F32
3570  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3571defm TLD4_UNIFIED_A_2D_S32_F32
3572  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3573
3574defm TLD4_UNIFIED_R_2D_U32_F32
3575  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3576defm TLD4_UNIFIED_G_2D_U32_F32
3577  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3578defm TLD4_UNIFIED_B_2D_U32_F32
3579  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3580defm TLD4_UNIFIED_A_2D_U32_F32
3581  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3582
3583}
3584
3585
3586
3587//=== Surface load instructions
3588
3589let IsSuld = true in {
3590
3591class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3592    : NVPTXInst<(outs outtype:$r),
3593                !con(surf, (ins Int32Regs:$x)),
3594                inst # " \\{$r\\}, [$s, \\{$x\\}];",
3595                []>;
3596multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
3597  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3598  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3599}
3600
3601defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3602defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3603defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3604defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
3605
3606defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3607defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3608defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3609defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
3610
3611defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3612defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3613defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3614defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3615
3616class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3617    : NVPTXInst<(outs outtype:$r),
3618                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3619                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
3620                []>;
3621multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3622  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3623  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3624}
3625
3626defm SULD_1D_ARRAY_I8_CLAMP
3627  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3628defm SULD_1D_ARRAY_I16_CLAMP
3629  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3630defm SULD_1D_ARRAY_I32_CLAMP
3631  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
3632defm SULD_1D_ARRAY_I64_CLAMP
3633  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
3634
3635defm SULD_1D_ARRAY_I8_TRAP
3636  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
3637defm SULD_1D_ARRAY_I16_TRAP
3638  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
3639defm SULD_1D_ARRAY_I32_TRAP
3640  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
3641defm SULD_1D_ARRAY_I64_TRAP
3642  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
3643
3644defm SULD_1D_ARRAY_I8_ZERO
3645  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
3646defm SULD_1D_ARRAY_I16_ZERO
3647  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
3648defm SULD_1D_ARRAY_I32_ZERO
3649  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
3650defm SULD_1D_ARRAY_I64_ZERO
3651  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
3652
3653class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
3654    : NVPTXInst<(outs outtype:$r),
3655                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3656                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
3657                []>;
3658multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
3659  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
3660  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
3661}
3662
3663defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
3664defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
3665defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
3666defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
3667
3668defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
3669defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
3670defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
3671defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
3672
3673defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
3674defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
3675defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
3676defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
3677
3678class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3679    : NVPTXInst<(outs outtype:$r),
3680                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3681                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
3682                []>;
3683multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
3684  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3685  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3686}
3687
3688defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
3689defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
3690defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
3691defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
3692
3693defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
3694defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
3695defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
3696defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
3697
3698defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
3699defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
3700defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
3701defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
3702
3703class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
3704    : NVPTXInst<(outs outtype:$r),
3705                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
3706                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
3707                []>;
3708multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
3709  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
3710  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
3711}
3712
3713defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
3714defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
3715defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
3716defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
3717
3718defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
3719defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
3720defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
3721defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
3722
3723defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
3724defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
3725defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
3726defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
3727}
3728
3729let IsSuld = 2 in {
3730
3731class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3732    : NVPTXInst<(outs outtype:$r, outtype:$g),
3733                !con(surf, (ins Int32Regs:$x)),
3734                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
3735                []>;
3736multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
3737  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3738  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
3739}
3740
3741defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
3742defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
3743defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
3744defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
3745
3746defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
3747defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
3748defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
3749defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
3750
3751defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
3752defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
3753defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
3754defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
3755
3756class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3757    : NVPTXInst<(outs outtype:$r, outtype:$g),
3758                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3759                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
3760                []>;
3761multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
3762  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3763  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
3764}
3765
3766defm SULD_1D_ARRAY_V2I8_CLAMP
3767  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
3768defm SULD_1D_ARRAY_V2I16_CLAMP
3769  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
3770defm SULD_1D_ARRAY_V2I32_CLAMP
3771  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
3772defm SULD_1D_ARRAY_V2I64_CLAMP
3773  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
3774
3775defm SULD_1D_ARRAY_V2I8_TRAP
3776  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
3777defm SULD_1D_ARRAY_V2I16_TRAP
3778  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
3779defm SULD_1D_ARRAY_V2I32_TRAP
3780  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
3781defm SULD_1D_ARRAY_V2I64_TRAP
3782  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
3783
3784defm SULD_1D_ARRAY_V2I8_ZERO
3785  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
3786defm SULD_1D_ARRAY_V2I16_ZERO
3787  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
3788defm SULD_1D_ARRAY_V2I32_ZERO
3789  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
3790defm SULD_1D_ARRAY_V2I64_ZERO
3791  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
3792
3793class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3794    : NVPTXInst<(outs outtype:$r, outtype:$g),
3795                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3796                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
3797                []>;
3798multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
3799  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3800  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
3801}
3802
3803defm SULD_2D_V2I8_CLAMP
3804  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
3805defm SULD_2D_V2I16_CLAMP
3806  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
3807defm SULD_2D_V2I32_CLAMP
3808  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
3809defm SULD_2D_V2I64_CLAMP
3810  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
3811
3812defm SULD_2D_V2I8_TRAP
3813  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
3814defm SULD_2D_V2I16_TRAP
3815  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
3816defm SULD_2D_V2I32_TRAP
3817  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
3818defm SULD_2D_V2I64_TRAP
3819  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
3820
3821defm SULD_2D_V2I8_ZERO
3822  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
3823defm SULD_2D_V2I16_ZERO
3824  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
3825defm SULD_2D_V2I32_ZERO
3826  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
3827defm SULD_2D_V2I64_ZERO
3828  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
3829
3830class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3831    : NVPTXInst<(outs outtype:$r, outtype:$g),
3832                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3833                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
3834                []>;
3835multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
3836  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3837  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
3838}
3839
3840defm SULD_2D_ARRAY_V2I8_CLAMP
3841  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
3842defm SULD_2D_ARRAY_V2I16_CLAMP
3843  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
3844defm SULD_2D_ARRAY_V2I32_CLAMP
3845  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
3846defm SULD_2D_ARRAY_V2I64_CLAMP
3847  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
3848
3849defm SULD_2D_ARRAY_V2I8_TRAP
3850  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
3851defm SULD_2D_ARRAY_V2I16_TRAP
3852  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
3853defm SULD_2D_ARRAY_V2I32_TRAP
3854  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
3855defm SULD_2D_ARRAY_V2I64_TRAP
3856  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
3857
3858defm SULD_2D_ARRAY_V2I8_ZERO
3859  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
3860defm SULD_2D_ARRAY_V2I16_ZERO
3861  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
3862defm SULD_2D_ARRAY_V2I32_ZERO
3863  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
3864defm SULD_2D_ARRAY_V2I64_ZERO
3865  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
3866
3867class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
3868    : NVPTXInst<(outs outtype:$r, outtype:$g),
3869                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
3870                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
3871                []>;
3872multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
3873  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
3874  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
3875}
3876
3877defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
3878defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
3879defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
3880defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
3881
3882defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
3883defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
3884defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
3885defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
3886
3887defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
3888defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
3889defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
3890defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
3891
3892}
3893
3894let IsSuld = 3 in {
3895
3896class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
3897    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
3898                !con(surf, (ins Int32Regs:$x)),
3899                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
3900                []>;
3901multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
3902  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
3903  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
3904}
3905
3906defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
3907defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
3908defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
3909
3910defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
3911defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
3912defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
3913
3914defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
3915defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
3916defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
3917
3918class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
3919    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
3920                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3921                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
3922                []>;
3923multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
3924  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
3925  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
3926}
3927
3928defm SULD_1D_ARRAY_V4I8_CLAMP
3929  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
3930defm SULD_1D_ARRAY_V4I16_CLAMP
3931  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
3932defm SULD_1D_ARRAY_V4I32_CLAMP
3933  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
3934
3935defm SULD_1D_ARRAY_V4I8_TRAP
3936  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
3937defm SULD_1D_ARRAY_V4I16_TRAP
3938  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
3939defm SULD_1D_ARRAY_V4I32_TRAP
3940  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
3941
3942defm SULD_1D_ARRAY_V4I8_ZERO
3943  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
3944defm SULD_1D_ARRAY_V4I16_ZERO
3945  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
3946defm SULD_1D_ARRAY_V4I32_ZERO
3947  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
3948
3949class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
3950    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
3951                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
3952                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
3953                []>;
3954multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
3955  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
3956  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
3957}
3958
3959defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
3960defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
3961defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
3962
3963defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
3964defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
3965defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
3966
3967defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
3968defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
3969defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
3970
3971class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
3972    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
3973                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
3974                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
3975                []>;
3976multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
3977  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
3978  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
3979}
3980
3981defm SULD_2D_ARRAY_V4I8_CLAMP
3982  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
3983defm SULD_2D_ARRAY_V4I16_CLAMP
3984  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
3985defm SULD_2D_ARRAY_V4I32_CLAMP
3986  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
3987
3988defm SULD_2D_ARRAY_V4I8_TRAP
3989  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
3990defm SULD_2D_ARRAY_V4I16_TRAP
3991  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
3992defm SULD_2D_ARRAY_V4I32_TRAP
3993  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
3994
3995defm SULD_2D_ARRAY_V4I8_ZERO
3996  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
3997defm SULD_2D_ARRAY_V4I16_ZERO
3998  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
3999defm SULD_2D_ARRAY_V4I32_ZERO
4000  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
4001
4002class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4003    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4004                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4005                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
4006                []>;
4007multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4008  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4009  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
4010}
4011
4012defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4013defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4014defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
4015
4016defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4017defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4018defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
4019
4020defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4021defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4022defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
4023
4024}
4025
4026//-----------------------------------
4027// Texture Query Intrinsics
4028//-----------------------------------
4029
4030let IsSurfTexQuery = true in {
4031def TXQ_CHANNEL_ORDER_R
4032  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4033              "txq.channel_order.b32 \t$d, [$a];",
4034              []>;
4035def TXQ_CHANNEL_ORDER_I
4036  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4037              "txq.channel_order.b32 \t$d, [$a];",
4038              []>;
4039def TXQ_CHANNEL_DATA_TYPE_R
4040  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4041              "txq.channel_data_type.b32 \t$d, [$a];",
4042              []>;
4043def TXQ_CHANNEL_DATA_TYPE_I
4044  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4045              "txq.channel_data_type.b32 \t$d, [$a];",
4046              []>;
4047def TXQ_WIDTH_R
4048  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4049              "txq.width.b32 \t$d, [$a];",
4050              []>;
4051def TXQ_WIDTH_I
4052  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4053              "txq.width.b32 \t$d, [$a];",
4054              []>;
4055def TXQ_HEIGHT_R
4056  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4057              "txq.height.b32 \t$d, [$a];",
4058              []>;
4059def TXQ_HEIGHT_I
4060  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4061              "txq.height.b32 \t$d, [$a];",
4062              []>;
4063def TXQ_DEPTH_R
4064  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4065              "txq.depth.b32 \t$d, [$a];",
4066              []>;
4067def TXQ_DEPTH_I
4068  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4069              "txq.depth.b32 \t$d, [$a];",
4070              []>;
4071def TXQ_ARRAY_SIZE_R
4072  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4073              "txq.array_size.b32 \t$d, [$a];",
4074              []>;
4075def TXQ_ARRAY_SIZE_I
4076  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4077              "txq.array_size.b32 \t$d, [$a];",
4078              []>;
4079def TXQ_NUM_SAMPLES_R
4080  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4081              "txq.num_samples.b32 \t$d, [$a];",
4082              []>;
4083def TXQ_NUM_SAMPLES_I
4084  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4085              "txq.num_samples.b32 \t$d, [$a];",
4086              []>;
4087def TXQ_NUM_MIPMAP_LEVELS_R
4088  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4089              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4090              []>;
4091def TXQ_NUM_MIPMAP_LEVELS_I
4092  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4093              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4094              []>;
4095}
4096
4097def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4098          (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4099def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4100          (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4101def : Pat<(int_nvvm_txq_width Int64Regs:$a),
4102          (TXQ_WIDTH_R Int64Regs:$a)>;
4103def : Pat<(int_nvvm_txq_height Int64Regs:$a),
4104          (TXQ_HEIGHT_R Int64Regs:$a)>;
4105def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4106          (TXQ_DEPTH_R Int64Regs:$a)>;
4107def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4108          (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
4109def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4110          (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
4111def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4112          (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
4113
4114
4115//-----------------------------------
4116// Surface Query Intrinsics
4117//-----------------------------------
4118
4119let IsSurfTexQuery = true in {
4120def SUQ_CHANNEL_ORDER_R
4121  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4122              "suq.channel_order.b32 \t$d, [$a];",
4123              []>;
4124def SUQ_CHANNEL_ORDER_I
4125  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4126              "suq.channel_order.b32 \t$d, [$a];",
4127              []>;
4128def SUQ_CHANNEL_DATA_TYPE_R
4129  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4130              "suq.channel_data_type.b32 \t$d, [$a];",
4131              []>;
4132def SUQ_CHANNEL_DATA_TYPE_I
4133  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4134              "suq.channel_data_type.b32 \t$d, [$a];",
4135              []>;
4136def SUQ_WIDTH_R
4137  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4138              "suq.width.b32 \t$d, [$a];",
4139              []>;
4140def SUQ_WIDTH_I
4141  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4142              "suq.width.b32 \t$d, [$a];",
4143              []>;
4144def SUQ_HEIGHT_R
4145  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4146              "suq.height.b32 \t$d, [$a];",
4147              []>;
4148def SUQ_HEIGHT_I
4149  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4150              "suq.height.b32 \t$d, [$a];",
4151              []>;
4152def SUQ_DEPTH_R
4153  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4154              "suq.depth.b32 \t$d, [$a];",
4155              []>;
4156def SUQ_DEPTH_I
4157  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4158              "suq.depth.b32 \t$d, [$a];",
4159              []>;
4160def SUQ_ARRAY_SIZE_R
4161  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
4162              "suq.array_size.b32 \t$d, [$a];",
4163              []>;
4164def SUQ_ARRAY_SIZE_I
4165  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4166              "suq.array_size.b32 \t$d, [$a];",
4167              []>;
4168}
4169
4170def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4171          (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
4172def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4173          (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
4174def : Pat<(int_nvvm_suq_width Int64Regs:$a),
4175          (SUQ_WIDTH_R Int64Regs:$a)>;
4176def : Pat<(int_nvvm_suq_height Int64Regs:$a),
4177          (SUQ_HEIGHT_R Int64Regs:$a)>;
4178def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4179          (SUQ_DEPTH_R Int64Regs:$a)>;
4180def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4181          (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
4182
4183
4184//===- Handle Query -------------------------------------------------------===//
4185
4186// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
4187def ISTYPEP_SAMPLER
4188  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4189              "istypep.samplerref \t$d, $a;",
4190              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
4191def ISTYPEP_SURFACE
4192  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4193              "istypep.surfref \t$d, $a;",
4194              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
4195def ISTYPEP_TEXTURE
4196  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
4197              "istypep.texref \t$d, $a;",
4198              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
4199
4200//===- Surface Stores -----------------------------------------------------===//
4201
4202let IsSust = true in {
4203
4204class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4205    : NVPTXInst<(outs),
4206                !con(surf, (ins Int32Regs:$x, intype:$r)),
4207                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4208                []>;
4209multiclass SUST_1D<string inst, NVPTXRegClass intype> {
4210  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4211  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4212}
4213
4214defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4215defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4216defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4217defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
4218
4219defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4220defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4221defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4222defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
4223
4224defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4225defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4226defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4227defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
4228
4229defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4230defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4231defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
4232
4233class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4234    : NVPTXInst<(outs),
4235                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4236                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
4237                []>;
4238multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4239  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4240  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4241}
4242
4243defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4244defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4245defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4246defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
4247
4248defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4249defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4250defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4251defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
4252
4253defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4254defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4255defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4256defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
4257
4258defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4259defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4260defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
4261
4262class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4263    : NVPTXInst<(outs),
4264                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4265                                intype:$b, intype:$a)),
4266                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4267                []>;
4268multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4269  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4270  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4271}
4272
4273defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4274defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4275defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
4276
4277defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4278defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4279defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
4280
4281defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4282defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4283defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
4284
4285defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4286defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4287defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
4288
4289class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4290    : NVPTXInst<(outs),
4291                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4292                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
4293                []>;
4294multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4295  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4296  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4297}
4298
4299defm SUST_B_1D_ARRAY_B8_CLAMP
4300  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4301defm SUST_B_1D_ARRAY_B16_CLAMP
4302  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4303defm SUST_B_1D_ARRAY_B32_CLAMP
4304  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4305defm SUST_B_1D_ARRAY_B64_CLAMP
4306  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
4307
4308defm SUST_B_1D_ARRAY_B8_TRAP
4309  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4310defm SUST_B_1D_ARRAY_B16_TRAP
4311  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4312defm SUST_B_1D_ARRAY_B32_TRAP
4313  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4314defm SUST_B_1D_ARRAY_B64_TRAP
4315  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
4316
4317defm SUST_B_1D_ARRAY_B8_ZERO
4318  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4319defm SUST_B_1D_ARRAY_B16_ZERO
4320  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4321defm SUST_B_1D_ARRAY_B32_ZERO
4322  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4323defm SUST_B_1D_ARRAY_B64_ZERO
4324  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
4325
4326defm SUST_P_1D_ARRAY_B8_TRAP
4327  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4328defm SUST_P_1D_ARRAY_B16_TRAP
4329  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4330defm SUST_P_1D_ARRAY_B32_TRAP
4331  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
4332
4333class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4334    : NVPTXInst<(outs),
4335                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4336                                intype:$r, intype:$g)),
4337                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4338                []>;
4339multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4340  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4341  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4342}
4343
4344defm SUST_B_1D_ARRAY_V2B8_CLAMP
4345  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4346defm SUST_B_1D_ARRAY_V2B16_CLAMP
4347  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4348defm SUST_B_1D_ARRAY_V2B32_CLAMP
4349  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4350defm SUST_B_1D_ARRAY_V2B64_CLAMP
4351  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
4352
4353defm SUST_B_1D_ARRAY_V2B8_TRAP
4354  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4355defm SUST_B_1D_ARRAY_V2B16_TRAP
4356  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4357defm SUST_B_1D_ARRAY_V2B32_TRAP
4358  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4359defm SUST_B_1D_ARRAY_V2B64_TRAP
4360  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
4361
4362defm SUST_B_1D_ARRAY_V2B8_ZERO
4363  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4364defm SUST_B_1D_ARRAY_V2B16_ZERO
4365  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4366defm SUST_B_1D_ARRAY_V2B32_ZERO
4367  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4368defm SUST_B_1D_ARRAY_V2B64_ZERO
4369  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
4370
4371defm SUST_P_1D_ARRAY_V2B8_TRAP
4372  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4373defm SUST_P_1D_ARRAY_V2B16_TRAP
4374  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4375defm SUST_P_1D_ARRAY_V2B32_TRAP
4376  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
4377
4378class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4379    : NVPTXInst<(outs),
4380                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4381                                intype:$r, intype:$g, intype:$b, intype:$a)),
4382                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
4383                []>;
4384multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4385  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4386  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4387}
4388
4389defm SUST_B_1D_ARRAY_V4B8_CLAMP
4390  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4391defm SUST_B_1D_ARRAY_V4B16_CLAMP
4392  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4393defm SUST_B_1D_ARRAY_V4B32_CLAMP
4394  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
4395
4396defm SUST_B_1D_ARRAY_V4B8_TRAP
4397  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4398defm SUST_B_1D_ARRAY_V4B16_TRAP
4399  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4400defm SUST_B_1D_ARRAY_V4B32_TRAP
4401  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
4402
4403defm SUST_B_1D_ARRAY_V4B8_ZERO
4404  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4405defm SUST_B_1D_ARRAY_V4B16_ZERO
4406  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4407defm SUST_B_1D_ARRAY_V4B32_ZERO
4408  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
4409
4410defm SUST_P_1D_ARRAY_V4B8_TRAP
4411  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4412defm SUST_P_1D_ARRAY_V4B16_TRAP
4413  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4414defm SUST_P_1D_ARRAY_V4B32_TRAP
4415  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
4416
4417class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4418    : NVPTXInst<(outs),
4419                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4420                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4421                []>;
4422multiclass SUST_2D<string inst, NVPTXRegClass intype> {
4423  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4424  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4425}
4426
4427defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4428defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4429defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4430defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
4431
4432defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4433defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4434defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4435defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
4436
4437defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4438defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4439defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4440defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
4441
4442defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4443defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4444defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
4445
4446class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4447    : NVPTXInst<(outs),
4448                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4449                                intype:$r, intype:$g)),
4450                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
4451                []>;
4452multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4453  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4454  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4455}
4456
4457defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4458defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4459defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4460defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
4461
4462defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4463defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4464defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4465defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
4466
4467defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4468defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4469defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4470defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
4471
4472defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4473defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4474defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4475
4476class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4477    : NVPTXInst<(outs),
4478                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4479                                intype:$r, intype:$g, intype:$b, intype:$a)),
4480                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
4481                []>;
4482multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4483  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4484  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4485}
4486
4487defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4488defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4489defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4490
4491defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4492defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4493defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4494
4495defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4496defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4497defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4498
4499defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4500defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4501defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4502
4503class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
4504    : NVPTXInst<(outs),
4505                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4506                                intype:$r)),
4507                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
4508                []>;
4509multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4510  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4511  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4512}
4513
4514defm SUST_B_2D_ARRAY_B8_CLAMP
4515  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4516defm SUST_B_2D_ARRAY_B16_CLAMP
4517  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4518defm SUST_B_2D_ARRAY_B32_CLAMP
4519  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4520defm SUST_B_2D_ARRAY_B64_CLAMP
4521  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4522
4523defm SUST_B_2D_ARRAY_B8_TRAP
4524  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4525defm SUST_B_2D_ARRAY_B16_TRAP
4526  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4527defm SUST_B_2D_ARRAY_B32_TRAP
4528  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4529defm SUST_B_2D_ARRAY_B64_TRAP
4530  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4531
4532defm SUST_B_2D_ARRAY_B8_ZERO
4533  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4534defm SUST_B_2D_ARRAY_B16_ZERO
4535  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4536defm SUST_B_2D_ARRAY_B32_ZERO
4537  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4538defm SUST_B_2D_ARRAY_B64_ZERO
4539  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4540
4541defm SUST_P_2D_ARRAY_B8_TRAP
4542  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4543defm SUST_P_2D_ARRAY_B16_TRAP
4544  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4545defm SUST_P_2D_ARRAY_B32_TRAP
4546  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4547
4548class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4549    : NVPTXInst<(outs),
4550                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4551                                intype:$r, intype:$g)),
4552                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
4553                []>;
4554multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4555  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4556  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4557}
4558
4559defm SUST_B_2D_ARRAY_V2B8_CLAMP
4560  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4561defm SUST_B_2D_ARRAY_V2B16_CLAMP
4562  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4563defm SUST_B_2D_ARRAY_V2B32_CLAMP
4564  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4565defm SUST_B_2D_ARRAY_V2B64_CLAMP
4566  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4567
4568defm SUST_B_2D_ARRAY_V2B8_TRAP
4569  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4570defm SUST_B_2D_ARRAY_V2B16_TRAP
4571  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4572defm SUST_B_2D_ARRAY_V2B32_TRAP
4573  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4574defm SUST_B_2D_ARRAY_V2B64_TRAP
4575  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4576
4577defm SUST_B_2D_ARRAY_V2B8_ZERO
4578  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4579defm SUST_B_2D_ARRAY_V2B16_ZERO
4580  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4581defm SUST_B_2D_ARRAY_V2B32_ZERO
4582  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4583defm SUST_B_2D_ARRAY_V2B64_ZERO
4584  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4585
4586defm SUST_P_2D_ARRAY_V2B8_TRAP
4587  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4588defm SUST_P_2D_ARRAY_V2B16_TRAP
4589  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4590defm SUST_P_2D_ARRAY_V2B32_TRAP
4591  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4592
4593class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
4594    : NVPTXInst<(outs),
4595                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4596                                intype:$r, intype:$g, intype:$b, intype:$a)),
4597                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
4598                []>;
4599multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4600  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4601  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4602}
4603
4604defm SUST_B_2D_ARRAY_V4B8_CLAMP
4605  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4606defm SUST_B_2D_ARRAY_V4B16_CLAMP
4607  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4608defm SUST_B_2D_ARRAY_V4B32_CLAMP
4609  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4610
4611defm SUST_B_2D_ARRAY_V4B8_TRAP
4612  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4613defm SUST_B_2D_ARRAY_V4B16_TRAP
4614  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4615defm SUST_B_2D_ARRAY_V4B32_TRAP
4616  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4617
4618defm SUST_B_2D_ARRAY_V4B8_ZERO
4619  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4620defm SUST_B_2D_ARRAY_V4B16_ZERO
4621  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4622defm SUST_B_2D_ARRAY_V4B32_ZERO
4623  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4624
4625defm SUST_P_2D_ARRAY_V4B8_TRAP
4626  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4627defm SUST_P_2D_ARRAY_V4B16_TRAP
4628  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4629defm SUST_P_2D_ARRAY_V4B32_TRAP
4630  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4631
4632class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
4633    : NVPTXInst<(outs),
4634                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4635                                intype:$r)),
4636                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
4637                []>;
4638multiclass SUST_3D<string inst, NVPTXRegClass intype> {
4639  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
4640  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
4641}
4642
4643defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
4644defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
4645defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
4646defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
4647
4648defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
4649defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
4650defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
4651defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
4652
4653defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
4654defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
4655defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
4656defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
4657
4658defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
4659defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
4660defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
4661
4662class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
4663    : NVPTXInst<(outs),
4664                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4665                                intype:$r, intype:$g)),
4666                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
4667                []>;
4668multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
4669  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4670  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
4671}
4672
4673defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
4674defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
4675defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
4676defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
4677
4678defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
4679defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
4680defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
4681defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
4682
4683defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
4684defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
4685defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
4686defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
4687
4688defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
4689defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
4690defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
4691
4692class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4693    : NVPTXInst<(outs),
4694                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4695                                intype:$r, intype:$g, intype:$b, intype:$a)),
4696                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
4697                []>;
4698multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
4699  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4700  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
4701}
4702
4703defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
4704defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
4705defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
4706
4707defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
4708defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
4709defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
4710
4711defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
4712defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
4713defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
4714
4715defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
4716defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
4717defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
4718
4719}
4720
4721// Surface store instruction patterns
4722// I'm not sure why we can't just include these in the instruction definitions,
4723// but TableGen complains of type errors :(
4724
4725// .clamp variant
4726def : Pat<(int_nvvm_sust_b_1d_i8_clamp
4727           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4728          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4729
4730def : Pat<(int_nvvm_sust_b_1d_i16_clamp
4731           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
4732          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
4733
4734def : Pat<(int_nvvm_sust_b_1d_i32_clamp
4735           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
4736          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
4737
4738def : Pat<(int_nvvm_sust_b_1d_i64_clamp
4739           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
4740          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
4741
4742def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
4743           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4744          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4745           Int16Regs:$r, Int16Regs:$g)>;
4746
4747def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
4748           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4749          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4750           Int16Regs:$r, Int16Regs:$g)>;
4751
4752def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
4753           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4754          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4755           Int32Regs:$r, Int32Regs:$g)>;
4756
4757def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
4758           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4759          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4760           Int64Regs:$r, Int64Regs:$g)>;
4761
4762def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
4763           Int64Regs:$s, Int32Regs:$x,
4764           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4765          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4766           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4767
4768def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
4769           Int64Regs:$s, Int32Regs:$x,
4770           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4771          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4772           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4773
4774def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
4775           Int64Regs:$s, Int32Regs:$x,
4776           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4777          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
4778           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4779
4780
4781
4782def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
4783           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
4784          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4785           Int16Regs:$r)>;
4786
4787def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
4788           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
4789          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4790           Int16Regs:$r)>;
4791
4792def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
4793           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
4794          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4795           Int32Regs:$r)>;
4796
4797def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
4798           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
4799          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4800           Int64Regs:$r)>;
4801
4802def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
4803          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4804          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4805           Int16Regs:$r, Int16Regs:$g)>;
4806
4807def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
4808          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
4809          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4810           Int16Regs:$r, Int16Regs:$g)>;
4811
4812def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
4813          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
4814          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4815           Int32Regs:$r, Int32Regs:$g)>;
4816
4817def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
4818          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
4819          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4820           Int64Regs:$r, Int64Regs:$g)>;
4821
4822def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
4823           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4824           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4825          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4826           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4827
4828def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
4829           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4830           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4831          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4832           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4833
4834def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
4835           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4836           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4837          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
4838           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4839
4840
4841
4842def : Pat<(int_nvvm_sust_b_2d_i8_clamp
4843           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4844          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4845           Int16Regs:$r)>;
4846
4847def : Pat<(int_nvvm_sust_b_2d_i16_clamp
4848           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4849          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4850           Int16Regs:$r)>;
4851
4852def : Pat<(int_nvvm_sust_b_2d_i32_clamp
4853           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4854          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4855           Int32Regs:$r)>;
4856
4857def : Pat<(int_nvvm_sust_b_2d_i64_clamp
4858           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4859          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4860           Int64Regs:$r)>;
4861
4862def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
4863          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
4864          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4865           Int16Regs:$r, Int16Regs:$g)>;
4866
4867def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
4868          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
4869          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4870           Int16Regs:$r, Int16Regs:$g)>;
4871
4872def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
4873          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
4874          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4875           Int32Regs:$r, Int32Regs:$g)>;
4876
4877def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
4878          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
4879          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4880           Int64Regs:$r, Int64Regs:$g)>;
4881
4882def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
4883           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4884           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4885          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4886           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4887
4888def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
4889           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4890           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4891          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4892           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4893
4894def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
4895           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4896           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4897          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
4898           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4899
4900
4901
4902def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
4903          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4904          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
4905           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4906           Int16Regs:$r)>;
4907
4908def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
4909          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
4910          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
4911           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4912           Int16Regs:$r)>;
4913
4914def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
4915          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
4916          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
4917           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4918           Int32Regs:$r)>;
4919
4920def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
4921          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
4922          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
4923           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4924           Int64Regs:$r)>;
4925
4926def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
4927           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4928           Int16Regs:$r, Int16Regs:$g),
4929          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4930           Int32Regs:$x, Int32Regs:$y,
4931           Int16Regs:$r, Int16Regs:$g)>;
4932
4933def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
4934           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4935           Int16Regs:$r, Int16Regs:$g),
4936          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4937           Int32Regs:$x, Int32Regs:$y,
4938           Int16Regs:$r, Int16Regs:$g)>;
4939
4940def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
4941           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
4942           Int32Regs:$g),
4943          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4944           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
4945
4946def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
4947           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
4948           Int64Regs:$g),
4949          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4950           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
4951
4952def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
4953           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4954           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4955          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
4956           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4957           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4958
4959def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
4960           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4961           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
4962          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
4963           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4964           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
4965
4966def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
4967           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
4968           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
4969          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
4970           Int32Regs:$x, Int32Regs:$y,
4971           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
4972
4973
4974
4975def : Pat<(int_nvvm_sust_b_3d_i8_clamp
4976           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4977           Int16Regs:$r),
4978          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
4979           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4980           Int16Regs:$r)>;
4981
4982def : Pat<(int_nvvm_sust_b_3d_i16_clamp
4983           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4984           Int16Regs:$r),
4985          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
4986           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4987           Int16Regs:$r)>;
4988
4989def : Pat<(int_nvvm_sust_b_3d_i32_clamp
4990           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4991           Int32Regs:$r),
4992          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
4993           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4994           Int32Regs:$r)>;
4995
4996def : Pat<(int_nvvm_sust_b_3d_i64_clamp
4997           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
4998           Int64Regs:$r),
4999          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
5000           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5001           Int64Regs:$r)>;
5002
5003def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
5004           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5005           Int16Regs:$r, Int16Regs:$g),
5006          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
5007           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5008           Int16Regs:$r, Int16Regs:$g)>;
5009
5010def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
5011           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5012           Int16Regs:$r, Int16Regs:$g),
5013          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
5014           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5015           Int16Regs:$r, Int16Regs:$g)>;
5016
5017def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
5018           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5019           Int32Regs:$r, Int32Regs:$g),
5020          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
5021           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5022           Int32Regs:$r, Int32Regs:$g)>;
5023
5024def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
5025           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5026           Int64Regs:$r, Int64Regs:$g),
5027          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
5028           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5029           Int64Regs:$r, Int64Regs:$g)>;
5030
5031def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
5032           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5033           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5034          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
5035           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5036           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5037
5038def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
5039           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5040           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5041          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
5042           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5043           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5044
5045def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
5046           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5047           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5048          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
5049           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5050           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5051
5052
5053// .trap variant
5054def : Pat<(int_nvvm_sust_b_1d_i8_trap
5055           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5056          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5057
5058def : Pat<(int_nvvm_sust_b_1d_i16_trap
5059           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5060          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5061
5062def : Pat<(int_nvvm_sust_b_1d_i32_trap
5063           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5064          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5065
5066def : Pat<(int_nvvm_sust_b_1d_i64_trap
5067           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5068          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5069
5070def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
5071           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5072          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5073           Int16Regs:$r, Int16Regs:$g)>;
5074
5075def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
5076           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5077          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5078           Int16Regs:$r, Int16Regs:$g)>;
5079
5080def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
5081           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5082          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5083           Int32Regs:$r, Int32Regs:$g)>;
5084
5085def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
5086           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5087          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
5088           Int64Regs:$r, Int64Regs:$g)>;
5089
5090def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
5091           Int64Regs:$s, Int32Regs:$x,
5092           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5093          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5094           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5095
5096def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
5097           Int64Regs:$s, Int32Regs:$x,
5098           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5099          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5100           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5101
5102def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
5103           Int64Regs:$s, Int32Regs:$x,
5104           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5105          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5106           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5107
5108
5109
5110def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
5111           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5112          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5113           Int16Regs:$r)>;
5114
5115def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
5116           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5117          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5118           Int16Regs:$r)>;
5119
5120def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
5121           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5122          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5123           Int32Regs:$r)>;
5124
5125def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
5126           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5127          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5128           Int64Regs:$r)>;
5129
5130def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
5131          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5132          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5133           Int16Regs:$r, Int16Regs:$g)>;
5134
5135def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
5136          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5137          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5138           Int16Regs:$r, Int16Regs:$g)>;
5139
5140def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
5141          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5142          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5143           Int32Regs:$r, Int32Regs:$g)>;
5144
5145def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
5146          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5147          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5148           Int64Regs:$r, Int64Regs:$g)>;
5149
5150def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
5151           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5152           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5153          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5154           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5155
5156def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
5157           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5158           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5159          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5160           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5161
5162def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
5163           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5164           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5165          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5166           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5167
5168
5169
5170def : Pat<(int_nvvm_sust_b_2d_i8_trap
5171           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5172          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5173           Int16Regs:$r)>;
5174
5175def : Pat<(int_nvvm_sust_b_2d_i16_trap
5176           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5177          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5178           Int16Regs:$r)>;
5179
5180def : Pat<(int_nvvm_sust_b_2d_i32_trap
5181           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5182          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5183           Int32Regs:$r)>;
5184
5185def : Pat<(int_nvvm_sust_b_2d_i64_trap
5186           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5187          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5188           Int64Regs:$r)>;
5189
5190def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
5191          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5192          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5193           Int16Regs:$r, Int16Regs:$g)>;
5194
5195def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
5196          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5197          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5198           Int16Regs:$r, Int16Regs:$g)>;
5199
5200def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
5201          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5202          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5203           Int32Regs:$r, Int32Regs:$g)>;
5204
5205def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
5206          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5207          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5208           Int64Regs:$r, Int64Regs:$g)>;
5209
5210def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
5211           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5212           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5213          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5214           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5215
5216def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
5217           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5218           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5219          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5220           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5221
5222def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
5223           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5224           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5225          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5226           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5227
5228
5229
5230def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
5231          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5232          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5233           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5234           Int16Regs:$r)>;
5235
5236def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
5237          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5238          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5239           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5240           Int16Regs:$r)>;
5241
5242def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
5243          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5244          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5245           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5246           Int32Regs:$r)>;
5247
5248def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
5249          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5250          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
5251           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5252           Int64Regs:$r)>;
5253
5254def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
5255           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5256           Int16Regs:$r, Int16Regs:$g),
5257          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5258           Int32Regs:$x, Int32Regs:$y,
5259           Int16Regs:$r, Int16Regs:$g)>;
5260
5261def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
5262           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5263           Int16Regs:$r, Int16Regs:$g),
5264          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5265           Int32Regs:$x, Int32Regs:$y,
5266           Int16Regs:$r, Int16Regs:$g)>;
5267
5268def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
5269           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5270           Int32Regs:$g),
5271          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5272           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5273
5274def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
5275           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5276           Int64Regs:$g),
5277          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
5278           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5279
5280def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
5281           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5282           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5283          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5284           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5285           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5286
5287def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
5288           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5289           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5290          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5291           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5292           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5293
5294def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
5295           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5296           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5297          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5298           Int32Regs:$x, Int32Regs:$y,
5299           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5300
5301
5302
5303def : Pat<(int_nvvm_sust_b_3d_i8_trap
5304           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5305           Int16Regs:$r),
5306          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
5307           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5308           Int16Regs:$r)>;
5309
5310def : Pat<(int_nvvm_sust_b_3d_i16_trap
5311           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5312           Int16Regs:$r),
5313          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
5314           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5315           Int16Regs:$r)>;
5316
5317def : Pat<(int_nvvm_sust_b_3d_i32_trap
5318           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5319           Int32Regs:$r),
5320          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
5321           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5322           Int32Regs:$r)>;
5323
5324def : Pat<(int_nvvm_sust_b_3d_i64_trap
5325           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5326           Int64Regs:$r),
5327          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
5328           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5329           Int64Regs:$r)>;
5330
5331def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
5332           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5333           Int16Regs:$r, Int16Regs:$g),
5334          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
5335           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5336           Int16Regs:$r, Int16Regs:$g)>;
5337
5338def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
5339           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5340           Int16Regs:$r, Int16Regs:$g),
5341          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
5342           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5343           Int16Regs:$r, Int16Regs:$g)>;
5344
5345def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
5346           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5347           Int32Regs:$r, Int32Regs:$g),
5348          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
5349           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5350           Int32Regs:$r, Int32Regs:$g)>;
5351
5352def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
5353           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5354           Int64Regs:$r, Int64Regs:$g),
5355          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
5356           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5357           Int64Regs:$r, Int64Regs:$g)>;
5358
5359def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
5360           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5361           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5362          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
5363           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5364           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5365
5366def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
5367           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5368           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5369          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
5370           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5371           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5372
5373def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
5374           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5375           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5376          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
5377           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5378           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5379
5380
5381// .zero variant
5382def : Pat<(int_nvvm_sust_b_1d_i8_zero
5383           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5384          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5385
5386def : Pat<(int_nvvm_sust_b_1d_i16_zero
5387           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5388          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5389
5390def : Pat<(int_nvvm_sust_b_1d_i32_zero
5391           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5392          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5393
5394def : Pat<(int_nvvm_sust_b_1d_i64_zero
5395           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5396          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
5397
5398def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
5399           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5400          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5401           Int16Regs:$r, Int16Regs:$g)>;
5402
5403def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
5404           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5405          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5406           Int16Regs:$r, Int16Regs:$g)>;
5407
5408def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
5409           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5410          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5411           Int32Regs:$r, Int32Regs:$g)>;
5412
5413def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
5414           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5415          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
5416           Int64Regs:$r, Int64Regs:$g)>;
5417
5418def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
5419           Int64Regs:$s, Int32Regs:$x,
5420           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5421          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
5422           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5423
5424def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
5425           Int64Regs:$s, Int32Regs:$x,
5426           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5427          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
5428           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5429
5430def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
5431           Int64Regs:$s, Int32Regs:$x,
5432           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5433          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
5434           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5435
5436
5437
5438def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
5439           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5440          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5441           Int16Regs:$r)>;
5442
5443def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
5444           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5445          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5446           Int16Regs:$r)>;
5447
5448def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
5449           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5450          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5451           Int32Regs:$r)>;
5452
5453def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
5454           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5455          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5456           Int64Regs:$r)>;
5457
5458def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
5459          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5460          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5461           Int16Regs:$r, Int16Regs:$g)>;
5462
5463def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
5464          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5465          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5466           Int16Regs:$r, Int16Regs:$g)>;
5467
5468def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
5469          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5470          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5471           Int32Regs:$r, Int32Regs:$g)>;
5472
5473def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
5474          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5475          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5476           Int64Regs:$r, Int64Regs:$g)>;
5477
5478def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
5479           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5480           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5481          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5482           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5483
5484def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
5485           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5486           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5487          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5488           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5489
5490def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
5491           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5492           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5493          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5494           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5495
5496
5497
5498def : Pat<(int_nvvm_sust_b_2d_i8_zero
5499           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5500          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5501           Int16Regs:$r)>;
5502
5503def : Pat<(int_nvvm_sust_b_2d_i16_zero
5504           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5505          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5506           Int16Regs:$r)>;
5507
5508def : Pat<(int_nvvm_sust_b_2d_i32_zero
5509           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5510          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5511           Int32Regs:$r)>;
5512
5513def : Pat<(int_nvvm_sust_b_2d_i64_zero
5514           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5515          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5516           Int64Regs:$r)>;
5517
5518def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
5519          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5520          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5521           Int16Regs:$r, Int16Regs:$g)>;
5522
5523def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
5524          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5525          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5526           Int16Regs:$r, Int16Regs:$g)>;
5527
5528def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
5529          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5530          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5531           Int32Regs:$r, Int32Regs:$g)>;
5532
5533def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
5534          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5535          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5536           Int64Regs:$r, Int64Regs:$g)>;
5537
5538def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
5539           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5540           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5541          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5542           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5543
5544def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
5545           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5546           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5547          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5548           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5549
5550def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
5551           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5552           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5553          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5554           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5555
5556
5557
5558def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
5559          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5560          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
5561           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5562           Int16Regs:$r)>;
5563
5564def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
5565          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5566          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
5567           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5568           Int16Regs:$r)>;
5569
5570def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
5571          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5572          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
5573           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5574           Int32Regs:$r)>;
5575
5576def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
5577          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5578          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
5579           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5580           Int64Regs:$r)>;
5581
5582def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
5583           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5584           Int16Regs:$r, Int16Regs:$g),
5585          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
5586           Int32Regs:$x, Int32Regs:$y,
5587           Int16Regs:$r, Int16Regs:$g)>;
5588
5589def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
5590           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5591           Int16Regs:$r, Int16Regs:$g),
5592          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
5593           Int32Regs:$x, Int32Regs:$y,
5594           Int16Regs:$r, Int16Regs:$g)>;
5595
5596def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
5597           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5598           Int32Regs:$g),
5599          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5600           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5601
5602def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
5603           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
5604           Int64Regs:$g),
5605          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
5606           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
5607
5608def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
5609           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5610           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5611          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
5612           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5613           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5614
5615def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
5616           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5617           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5618          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
5619           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5620           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5621
5622def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
5623           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5624           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5625          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
5626           Int32Regs:$x, Int32Regs:$y,
5627           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5628
5629
5630
5631def : Pat<(int_nvvm_sust_b_3d_i8_zero
5632           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5633           Int16Regs:$r),
5634          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
5635           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5636           Int16Regs:$r)>;
5637
5638def : Pat<(int_nvvm_sust_b_3d_i16_zero
5639           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5640           Int16Regs:$r),
5641          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
5642           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5643           Int16Regs:$r)>;
5644
5645def : Pat<(int_nvvm_sust_b_3d_i32_zero
5646           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5647           Int32Regs:$r),
5648          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
5649           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5650           Int32Regs:$r)>;
5651
5652def : Pat<(int_nvvm_sust_b_3d_i64_zero
5653           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5654           Int64Regs:$r),
5655          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
5656           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5657           Int64Regs:$r)>;
5658
5659def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
5660           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5661           Int16Regs:$r, Int16Regs:$g),
5662          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
5663           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5664           Int16Regs:$r, Int16Regs:$g)>;
5665
5666def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
5667           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5668           Int16Regs:$r, Int16Regs:$g),
5669          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
5670           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5671           Int16Regs:$r, Int16Regs:$g)>;
5672
5673def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
5674           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5675           Int32Regs:$r, Int32Regs:$g),
5676          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
5677           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5678           Int32Regs:$r, Int32Regs:$g)>;
5679
5680def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
5681           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5682           Int64Regs:$r, Int64Regs:$g),
5683          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
5684           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5685           Int64Regs:$r, Int64Regs:$g)>;
5686
5687def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
5688           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5689           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5690          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
5691           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5692           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5693
5694def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
5695           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5696           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5697          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
5698           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5699           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5700
5701def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
5702           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5703           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5704          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
5705           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5706           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5707
5708
5709
5710
5711def : Pat<(int_nvvm_sust_p_1d_i8_trap
5712           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5713          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5714
5715def : Pat<(int_nvvm_sust_p_1d_i16_trap
5716           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5717          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
5718
5719def : Pat<(int_nvvm_sust_p_1d_i32_trap
5720           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5721          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
5722
5723def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
5724           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5725          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5726           Int16Regs:$r, Int16Regs:$g)>;
5727
5728def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
5729           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5730          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5731           Int16Regs:$r, Int16Regs:$g)>;
5732
5733def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
5734           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5735          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5736           Int32Regs:$r, Int32Regs:$g)>;
5737
5738def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
5739           Int64Regs:$s, Int32Regs:$x,
5740           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5741          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
5742           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5743
5744def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
5745           Int64Regs:$s, Int32Regs:$x,
5746           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5747          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
5748           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5749
5750def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
5751           Int64Regs:$s, Int32Regs:$x,
5752           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5753          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
5754           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5755
5756
5757
5758def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
5759           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5760          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5761           Int16Regs:$r)>;
5762
5763def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
5764           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5765          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5766           Int16Regs:$r)>;
5767
5768def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
5769           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5770          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5771           Int32Regs:$r)>;
5772
5773def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
5774          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5775          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5776           Int16Regs:$r, Int16Regs:$g)>;
5777
5778def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
5779          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5780          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5781           Int16Regs:$r, Int16Regs:$g)>;
5782
5783def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
5784          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5785          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5786           Int32Regs:$r, Int32Regs:$g)>;
5787
5788def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
5789           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5790           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5791          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5792           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5793
5794def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
5795           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5796           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5797          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5798           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5799
5800def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
5801           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5802           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5803          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
5804           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5805
5806
5807
5808def : Pat<(int_nvvm_sust_p_2d_i8_trap
5809           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5810          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5811           Int16Regs:$r)>;
5812
5813def : Pat<(int_nvvm_sust_p_2d_i16_trap
5814           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5815          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5816           Int16Regs:$r)>;
5817
5818def : Pat<(int_nvvm_sust_p_2d_i32_trap
5819           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5820          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5821           Int32Regs:$r)>;
5822
5823def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
5824          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5825          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5826           Int16Regs:$r, Int16Regs:$g)>;
5827
5828def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
5829          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5830          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5831           Int16Regs:$r, Int16Regs:$g)>;
5832
5833def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
5834          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5835          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5836           Int32Regs:$r, Int32Regs:$g)>;
5837
5838def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
5839           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5840           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5841          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5842           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5843
5844def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
5845           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5846           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5847          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5848           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5849
5850def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
5851           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5852           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5853          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
5854           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5855
5856
5857
5858def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
5859          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5860          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
5861           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5862           Int16Regs:$r)>;
5863
5864def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
5865          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5866          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
5867           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5868           Int16Regs:$r)>;
5869
5870def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
5871          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5872          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
5873           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5874           Int32Regs:$r)>;
5875
5876def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
5877           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5878           Int16Regs:$r, Int16Regs:$g),
5879          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
5880           Int32Regs:$x, Int32Regs:$y,
5881           Int16Regs:$r, Int16Regs:$g)>;
5882
5883def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
5884           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5885           Int16Regs:$r, Int16Regs:$g),
5886          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
5887           Int32Regs:$x, Int32Regs:$y,
5888           Int16Regs:$r, Int16Regs:$g)>;
5889
5890def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
5891           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
5892           Int32Regs:$g),
5893          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5894           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
5895
5896def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
5897           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5898           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5899          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
5900           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5901           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5902
5903def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
5904           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5905           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5906          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
5907           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5908           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5909
5910def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
5911           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
5912           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5913          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
5914           Int32Regs:$x, Int32Regs:$y,
5915           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5916
5917
5918
5919def : Pat<(int_nvvm_sust_p_3d_i8_trap
5920           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5921           Int16Regs:$r),
5922          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
5923           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5924           Int16Regs:$r)>;
5925
5926def : Pat<(int_nvvm_sust_p_3d_i16_trap
5927           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5928           Int16Regs:$r),
5929          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
5930           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5931           Int16Regs:$r)>;
5932
5933def : Pat<(int_nvvm_sust_p_3d_i32_trap
5934           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5935           Int32Regs:$r),
5936          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
5937           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5938           Int32Regs:$r)>;
5939
5940def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
5941           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5942           Int16Regs:$r, Int16Regs:$g),
5943          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
5944           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5945           Int16Regs:$r, Int16Regs:$g)>;
5946
5947def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
5948           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5949           Int16Regs:$r, Int16Regs:$g),
5950          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
5951           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5952           Int16Regs:$r, Int16Regs:$g)>;
5953
5954def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
5955           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5956           Int32Regs:$r, Int32Regs:$g),
5957          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
5958           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5959           Int32Regs:$r, Int32Regs:$g)>;
5960
5961def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
5962           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5963           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5964          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
5965           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5966           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5967
5968def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
5969           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5970           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5971          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
5972           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5973           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
5974
5975def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
5976           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5977           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5978          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
5979           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5980           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
5981
5982//-----------------------------------
5983// Read Special Registers
5984//-----------------------------------
5985
5986class PTX_READ_SREG_R64<string regname, Intrinsic intop>
5987  : NVPTXInst<(outs Int64Regs:$d), (ins),
5988              !strconcat("mov.u64 \t$d, %", regname, ";"),
5989              [(set Int64Regs:$d, (intop))]>;
5990
5991class PTX_READ_SREG_R32<string regname, Intrinsic intop>
5992  : NVPTXInst<(outs Int32Regs:$d), (ins),
5993              !strconcat("mov.u32 \t$d, %", regname, ";"),
5994              [(set Int32Regs:$d, (intop))]>;
5995
5996// TODO Add read vector-version of special registers
5997
5998def INT_PTX_SREG_TID_X :
5999    PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
6000def INT_PTX_SREG_TID_Y :
6001    PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
6002def INT_PTX_SREG_TID_Z :
6003    PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
6004def INT_PTX_SREG_TID_W :
6005    PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
6006
6007def INT_PTX_SREG_NTID_X :
6008    PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
6009def INT_PTX_SREG_NTID_Y :
6010    PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
6011def INT_PTX_SREG_NTID_Z :
6012    PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
6013def INT_PTX_SREG_NTID_W :
6014    PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
6015
6016def INT_PTX_SREG_LANEID :
6017    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
6018def INT_PTX_SREG_WARPID :
6019    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
6020def INT_PTX_SREG_NWARPID :
6021    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
6022
6023def INT_PTX_SREG_CTAID_X :
6024    PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
6025def INT_PTX_SREG_CTAID_Y :
6026    PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
6027def INT_PTX_SREG_CTAID_Z :
6028    PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
6029def INT_PTX_SREG_CTAID_W :
6030    PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
6031
6032def INT_PTX_SREG_NCTAID_X :
6033    PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
6034def INT_PTX_SREG_NCTAID_Y :
6035    PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
6036def INT_PTX_SREG_NCTAID_Z :
6037    PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
6038def INT_PTX_SREG_NCTAID_W :
6039    PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
6040
6041def INT_PTX_SREG_SMID :
6042    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
6043def INT_PTX_SREG_NSMID :
6044    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
6045def INT_PTX_SREG_GRIDID :
6046    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
6047
6048def INT_PTX_SREG_LANEMASK_EQ :
6049    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
6050def INT_PTX_SREG_LANEMASK_LE :
6051    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
6052def INT_PTX_SREG_LANEMASK_LT :
6053    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
6054def INT_PTX_SREG_LANEMASK_GE :
6055    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
6056def INT_PTX_SREG_LANEMASK_GT :
6057    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
6058
6059def INT_PTX_SREG_CLOCK :
6060    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
6061def INT_PTX_SREG_CLOCK64 :
6062    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6063
6064def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
6065def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
6066def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
6067def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
6068
6069// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
6070// handle the constant.
6071def INT_PTX_SREG_WARPSIZE :
6072    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
6073              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
6074
6075// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
6076// In addition to target-independent fields provided by WMMA_REGS, it adds
6077// the fields commonly used to implement specific PTX instruction -- register
6078// types and names, constraints, parts of assembly, etc.
6079class WMMA_REGINFO<WMMA_REGS r, string op>
6080      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
6081  // NVPTX register types used to carry fragment data.
6082  NVPTXRegClass regclass = !cond(
6083    !eq(ptx_elt_type, "f16") : Float16x2Regs,
6084    !eq(ptx_elt_type, "f32") : Float32Regs,
6085    !eq(ptx_elt_type, "f64") : Float64Regs,
6086    !eq(ptx_elt_type, "bf16") : Int32Regs,
6087    !eq(ptx_elt_type, "tf32") : Int32Regs,
6088    !eq(ptx_elt_type, "s32") : Int32Regs,
6089    !eq(ptx_elt_type, "b16") : Int32Regs,
6090    !eq(ptx_elt_type, "s8") : Int32Regs,
6091    !eq(ptx_elt_type, "u8") : Int32Regs,
6092    !eq(ptx_elt_type, "s4") : Int32Regs,
6093    !eq(ptx_elt_type, "u4") : Int32Regs,
6094    !eq(ptx_elt_type, "b1") : Int32Regs);
6095
6096  // Instruction input/output arguments for the fragment.
6097  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
6098
6099  // List of register names for the fragment -- ["ra0", "ra1",...]
6100  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
6101
6102  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6103  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
6104
6105  // Predicates for particular fragment variant. Technically those are
6106  // per-instruction predicates, but currently all fragments that can be used in
6107  // a given instruction are subject to the same constraints, so an instruction
6108  // can use predicates from any of its fragments. If/when this is no
6109  // longer the case, we can concat all per-fragment predicates to enforce that
6110  // all fragments of the instruction are viable.
6111  list<Predicate> Predicates = !cond(
6112    // fp16 -> fp16/fp32 @ m16n16k16
6113    !and(!eq(geom, "m16n16k16"),
6114         !or(!eq(ptx_elt_type, "f16"),
6115             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
6116
6117    !and(!eq(geom,"m8n8k4"),
6118         !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
6119
6120    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
6121    !and(!or(!eq(geom, "m8n32k16"),
6122             !eq(geom, "m32n8k16")),
6123         !or(!eq(ptx_elt_type, "f16"),
6124             !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
6125
6126    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
6127    !and(!or(!eq(geom,"m16n16k16"),
6128             !eq(geom,"m8n32k16"),
6129             !eq(geom,"m32n8k16")),
6130         !or(!eq(ptx_elt_type, "u8"),
6131             !eq(ptx_elt_type, "s8"),
6132             !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
6133
6134    !and(!or(!eq(geom,"m16n16k16"),
6135             !eq(geom,"m8n32k16"),
6136             !eq(geom,"m32n8k16")),
6137         !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
6138
6139    !and(!eq(geom,"m16n16k8"),
6140         !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
6141
6142    !and(!eq(geom,"m16n16k8"),
6143         !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
6144
6145    // b1 -> s32 @ m8n8k128(b1)
6146    !and(!ne(op,"mma"),
6147         !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
6148
6149    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6150    !and(!ne(op,"mma"),
6151         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
6152
6153    !or(!eq(geom,"m16n8k8"),
6154        !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
6155
6156    !and(!ne(ptx_elt_type,"f64"),
6157         !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
6158
6159    // mma m8n8k32 requires higher PTX version
6160    !and(!eq(op,"mma"),
6161         !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
6162
6163    !and(!eq(ptx_elt_type,"f64"),
6164         !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
6165
6166    !and(!eq(op,"mma"),
6167         !or(!eq(geom, "m16n8k16"),
6168             !eq(geom, "m16n8k4"),
6169             !eq(geom, "m16n8k32"),
6170             !eq(geom, "m16n8k64"),
6171             !eq(geom, "m8n8k128"),
6172             !eq(geom, "m16n8k128"),
6173             !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
6174
6175    !and(!eq(op,"ldmatrix"),
6176         !eq(ptx_elt_type,"b16"),
6177         !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
6178
6179  // template DAGs for instruction inputs/output.
6180  dag Outs = !dag(outs, ptx_regs, reg_names);
6181  dag Ins = !dag(ins, ptx_regs, reg_names);
6182}
6183
6184// Convert dag of arguments into a dag to match given intrinsic.
6185class BuildPatternI<Intrinsic Intr, dag Ins> {
6186  // Build a dag pattern that matches the intrinsic call.
6187  dag ret = !foreach(tmp, Ins,
6188                          !subst(imem, ADDRvar,
6189                          !subst(MEMri64, ADDRri64,
6190                          !subst(MEMri, ADDRri,
6191                          !subst(ins, Intr, tmp)))));
6192}
6193
6194// Same as above, but uses PatFrag instead of an Intrinsic.
6195class BuildPatternPF<PatFrag Intr, dag Ins> {
6196  // Build a dag pattern that matches the intrinsic call.
6197  dag ret = !foreach(tmp, Ins,
6198                          !subst(imem, ADDRvar,
6199                          !subst(MEMri64, ADDRri64,
6200                          !subst(MEMri, ADDRri,
6201                          !subst(ins, Intr, tmp)))));
6202}
6203
6204// Common WMMA-related fields used for building patterns for all MMA instructions.
6205class WMMA_INSTR<string _Intr, list<dag> _Args>
6206  : NVPTXInst<(outs), (ins), "?", []> {
6207  Intrinsic Intr = !cast<Intrinsic>(_Intr);
6208  // Concatenate all arguments into a single dag.
6209  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
6210  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
6211  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
6212}
6213
6214//
6215// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6216//
6217
6218class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
6219                DAGOperand SrcOp>
6220  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
6221                              [!con((ins SrcOp:$src),
6222                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6223    Requires<Frag.Predicates> {
6224  // Load/store intrinsics are overloaded on pointer's address space.
6225  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6226  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6227  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6228  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
6229  // Build PatFrag that only matches particular address space.
6230  PatFrag IntrFrag = PatFrag<PFOperands,
6231                             PFOperandsIntr,
6232                             !cond(!eq(Space, ".shared"): AS_match.shared,
6233                                   !eq(Space, ".global"): AS_match.global,
6234                                   true: AS_match.generic)>;
6235  // Build AS-constrained pattern.
6236  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6237
6238  let OutOperandList = Frag.Outs;
6239  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6240  let AsmString = "wmma.load."
6241                  # Frag.frag
6242                  # ".sync"
6243                  # "${ptx:aligned}"
6244                  # "." # Layout
6245                  # "." # Frag.geom
6246                  # Space
6247                  # "." # Frag.ptx_elt_type # " \t"
6248                  # Frag.regstring
6249                  # ", [$src]"
6250                  # !if(WithStride, ", $ldm", "")
6251                  # ";";
6252}
6253
6254//
6255// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
6256//
6257class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
6258                   bit WithStride, DAGOperand DstOp>
6259  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
6260               [!con((ins DstOp:$dst),
6261                     Frag.Ins,
6262                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
6263    Requires<Frag.Predicates> {
6264
6265  // Load/store intrinsics are overloaded on pointer's address space.
6266  // To match the right intrinsic, we need to build AS-constrained PatFrag.
6267  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
6268  dag PFOperands = !con((ops node:$dst),
6269                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
6270                        !if(WithStride, (ops node:$ldm), (ops)));
6271  // Build PatFrag that only matches particular address space.
6272  PatFrag IntrFrag = PatFrag<PFOperands,
6273                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
6274                             !cond(!eq(Space, ".shared"): AS_match.shared,
6275                                   !eq(Space, ".global"): AS_match.global,
6276                                   true: AS_match.generic)>;
6277  // Build AS-constrained pattern.
6278  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6279
6280  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6281  let OutOperandList = (outs);
6282  let AsmString = "wmma.store.d.sync"
6283                  # "${ptx:aligned}"
6284                  # "." # Layout
6285                  # "." # Frag.geom
6286                  # Space
6287                  # "." # Frag.ptx_elt_type
6288                  # " \t[$dst],"
6289                  # Frag.regstring
6290                  # !if(WithStride, ", $ldm", "")
6291                  # ";";
6292}
6293
6294// Create all load/store variants
6295defset list<WMMA_INSTR> MMA_LDSTs  = {
6296  foreach layout = ["row", "col"] in {
6297    foreach stride = [false, true] in {
6298      foreach space = [".global", ".shared", ""] in {
6299        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6300          foreach frag = NVVM_MMA_OPS.all_ld_ops in
6301            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6302              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
6303          foreach frag = NVVM_MMA_OPS.all_st_ops in
6304            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6305              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
6306        } // addr
6307      } // space
6308    } // stride
6309  } // layout
6310} // defset
6311
6312// B1 instruction variants need extra constraints.
6313class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6314  string Op = b1op;
6315  WMMA_REGINFO Frag = FragA;
6316  list<Predicate> ret = !listconcat(
6317    FragA.Predicates,
6318    !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
6319  );
6320}
6321// WMMA.MMA
6322class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6323               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6324               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6325  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
6326                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
6327    // Requires does not seem to have effect on Instruction w/o Patterns.
6328    // We set it here anyways and propagate to the Pat<> we construct below.
6329    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6330  let OutOperandList = FragD.Outs;
6331  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6332  string TypeList = !cond(
6333    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6334                                     # "." # FragC.ptx_elt_type,
6335    1: "." # FragD.ptx_elt_type
6336       # "." # FragA.ptx_elt_type
6337       # "." # FragB.ptx_elt_type
6338       # "." # FragC.ptx_elt_type,
6339  );
6340  let AsmString = "wmma.mma"
6341                  # b1op
6342                  # ".sync"
6343                  # "${ptx:aligned}"
6344                  # "." # ALayout
6345                  # "." # BLayout
6346                  # "." # FragA.geom
6347                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
6348                  # TypeList
6349                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
6350                  # FragD.regstring # ",\n\t\t"
6351                  # FragA.regstring # ",\n\t\t"
6352                  # FragB.regstring # ",\n\t\t"
6353                  # FragC.regstring # ";";
6354}
6355
6356defset list<WMMA_INSTR> WMMAs  = {
6357  foreach layout_a = ["row", "col"] in {
6358    foreach layout_b = ["row", "col"] in {
6359      foreach satf = [0, 1] in {
6360        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6361          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6362            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6363              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6364                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6365                              WMMA_REGINFO<op[1], "wmma.mma">,
6366                              WMMA_REGINFO<op[2], "wmma.mma">,
6367                              WMMA_REGINFO<op[3], "wmma.mma">,
6368                              layout_a, layout_b, satf, rnd, b1op>;
6369              }
6370            } // b1op
6371          } // op
6372        } // rnd
6373      } // satf
6374    } // layout_b
6375  } // layout_a
6376} // defset
6377
6378// MMA
6379class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6380               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6381               string ALayout, string BLayout, int Satfinite, string b1op>
6382  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6383                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
6384    // Requires does not seem to have effect on Instruction w/o Patterns.
6385    // We set it here anyways and propagate to the Pat<> we construct below.
6386  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6387  let OutOperandList = FragD.Outs;
6388  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6389  string TypeList = "." # FragD.ptx_elt_type
6390                    # "." # FragA.ptx_elt_type
6391                    # "." # FragB.ptx_elt_type
6392                    # "." # FragC.ptx_elt_type;
6393  let AsmString = "mma.sync.aligned."
6394                  # FragA.geom
6395                  # "." # ALayout
6396                  # "." # BLayout
6397                  # !if(Satfinite, ".satfinite", "")
6398                  # TypeList
6399                  # b1op # "\n\t\t"
6400                  # FragD.regstring # ",\n\t\t"
6401                  # FragA.regstring # ",\n\t\t"
6402                  # FragB.regstring # ",\n\t\t"
6403                  # FragC.regstring # ";";
6404}
6405
6406defset list<WMMA_INSTR> MMAs  = {
6407  foreach layout_a = ["row", "col"] in {
6408    foreach layout_b = ["row", "col"] in {
6409      foreach satf = [0, 1] in {
6410        foreach op = NVVM_MMA_OPS.all_mma_ops in {
6411          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6412            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6413              def : MMA<WMMA_REGINFO<op[0], "mma">,
6414                        WMMA_REGINFO<op[1], "mma">,
6415                        WMMA_REGINFO<op[2], "mma">,
6416                        WMMA_REGINFO<op[3], "mma">,
6417                        layout_a, layout_b, satf, b1op>;
6418            }
6419          } // b1op
6420        } // op
6421      } // satf
6422    } // layout_b
6423  } // layout_a
6424} // defset
6425
6426//
6427// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6428//
6429class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6430               DAGOperand SrcOp>
6431  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6432    Requires<Frag.Predicates> {
6433  // Build PatFrag that only matches particular address space.
6434  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6435                             !cond(!eq(Space, ".shared"): AS_match.shared,
6436                                   true: AS_match.generic)>;
6437  // Build AS-constrained pattern.
6438  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6439
6440  let OutOperandList = Frag.Outs;
6441  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6442  let AsmString = "ldmatrix.sync.aligned."
6443                  # Frag.geom
6444                  # "." # Frag.frag
6445                  # !if(Transposed, ".trans", "")
6446                  # Space
6447                  # "." # Frag.ptx_elt_type
6448                  # " " # Frag.regstring # ", [$src];";
6449}
6450
6451// Create all ldmatrix variants
6452defset list<WMMA_INSTR> LDMATRIXs  = {
6453  foreach transposed = [false, true] in {
6454    foreach space = [".shared", ""] in {
6455      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6456        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6457          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6458            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6459                            addr>;
6460      } // addr
6461    } // space
6462  } // transposed
6463} // defset
6464
6465// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
6466// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
6467// the instruction record.
6468class MMA_PAT<WMMA_INSTR wi>
6469      : Pat<wi.IntrinsicPattern,
6470            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
6471                 (wi ptx.version))>,
6472        Requires<wi.Predicates>;
6473
6474// Build intrinsic->instruction patterns for all MMA instructions.
6475foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6476  def : MMA_PAT<mma>;
6477