xref: /freebsd/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==//
20b57cec5SDimitry Andric//
30b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric//
70b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andricdef immFloat0 : PatLeaf<(fpimm), [{
100b57cec5SDimitry Andric    float f = (float)N->getValueAPF().convertToFloat();
110b57cec5SDimitry Andric    return (f==0.0f);
120b57cec5SDimitry Andric}]>;
130b57cec5SDimitry Andric
140b57cec5SDimitry Andricdef immFloat1 : PatLeaf<(fpimm), [{
150b57cec5SDimitry Andric    float f = (float)N->getValueAPF().convertToFloat();
160b57cec5SDimitry Andric    return (f==1.0f);
170b57cec5SDimitry Andric}]>;
180b57cec5SDimitry Andric
190b57cec5SDimitry Andricdef immDouble0 : PatLeaf<(fpimm), [{
200b57cec5SDimitry Andric    double d = (double)N->getValueAPF().convertToDouble();
210b57cec5SDimitry Andric    return (d==0.0);
220b57cec5SDimitry Andric}]>;
230b57cec5SDimitry Andric
240b57cec5SDimitry Andricdef immDouble1 : PatLeaf<(fpimm), [{
250b57cec5SDimitry Andric    double d = (double)N->getValueAPF().convertToDouble();
260b57cec5SDimitry Andric    return (d==1.0);
270b57cec5SDimitry Andric}]>;
280b57cec5SDimitry Andric
290b57cec5SDimitry Andricdef AS_match {
300b57cec5SDimitry Andric  code generic = [{
310b57cec5SDimitry Andric   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
320b57cec5SDimitry Andric  }];
330b57cec5SDimitry Andric  code shared = [{
340b57cec5SDimitry Andric   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
350b57cec5SDimitry Andric  }];
360b57cec5SDimitry Andric  code global = [{
370b57cec5SDimitry Andric   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
380b57cec5SDimitry Andric  }];
390b57cec5SDimitry Andric}
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric// A node that will be replaced with the current PTX version.
420b57cec5SDimitry Andricclass PTX {
430b57cec5SDimitry Andric  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
440b57cec5SDimitry Andric    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
450b57cec5SDimitry Andric  }]>;
460b57cec5SDimitry Andric  // (i32 0) will be XForm'ed to the currently used PTX version.
470b57cec5SDimitry Andric  dag version = (PTXVerXform (i32 0));
480b57cec5SDimitry Andric}
490b57cec5SDimitry Andricdef ptx : PTX;
500b57cec5SDimitry Andric
510b57cec5SDimitry Andric// Generates list of n sequential register names.
520b57cec5SDimitry Andric// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
530b57cec5SDimitry Andricclass RegSeq<int n, string prefix> {
54e8d8bef9SDimitry Andric  list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret,
55e8d8bef9SDimitry Andric                                        [prefix # !sub(n, 1)]),
560b57cec5SDimitry Andric                            []);
570b57cec5SDimitry Andric}
580b57cec5SDimitry Andric
598bcb0991SDimitry Andricclass THREADMASK_INFO<bit sync> {
608bcb0991SDimitry Andric  list<bit> ret = !if(sync, [0, 1], [0]);
618bcb0991SDimitry Andric}
628bcb0991SDimitry Andric
630b57cec5SDimitry Andric//-----------------------------------
640b57cec5SDimitry Andric// Synchronization and shuffle functions
650b57cec5SDimitry Andric//-----------------------------------
66e8d8bef9SDimitry Andriclet isConvergent = true in {
670b57cec5SDimitry Andricdef INT_BARRIER0 : NVPTXInst<(outs), (ins),
680b57cec5SDimitry Andric                  "bar.sync \t0;",
690b57cec5SDimitry Andric      [(int_nvvm_barrier0)]>;
700b57cec5SDimitry Andricdef INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1),
710b57cec5SDimitry Andric                  "bar.sync \t$src1;",
720b57cec5SDimitry Andric      [(int_nvvm_barrier_n Int32Regs:$src1)]>;
730b57cec5SDimitry Andricdef INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2),
740b57cec5SDimitry Andric                  "bar.sync \t$src1, $src2;",
750b57cec5SDimitry Andric      [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>;
760b57cec5SDimitry Andricdef INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
770b57cec5SDimitry Andric  !strconcat("{{ \n\t",
780b57cec5SDimitry Andric             ".reg .pred \t%p1; \n\t",
790b57cec5SDimitry Andric             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
800b57cec5SDimitry Andric             "bar.red.popc.u32 \t$dst, 0, %p1; \n\t",
810b57cec5SDimitry Andric             "}}"),
820b57cec5SDimitry Andric      [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>;
830b57cec5SDimitry Andricdef INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
840b57cec5SDimitry Andric  !strconcat("{{ \n\t",
850b57cec5SDimitry Andric             ".reg .pred \t%p1; \n\t",
860b57cec5SDimitry Andric             ".reg .pred \t%p2; \n\t",
870b57cec5SDimitry Andric             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
880b57cec5SDimitry Andric             "bar.red.and.pred \t%p2, 0, %p1; \n\t",
890b57cec5SDimitry Andric             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
900b57cec5SDimitry Andric             "}}"),
910b57cec5SDimitry Andric      [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>;
920b57cec5SDimitry Andricdef INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred),
930b57cec5SDimitry Andric  !strconcat("{{ \n\t",
940b57cec5SDimitry Andric             ".reg .pred \t%p1; \n\t",
950b57cec5SDimitry Andric             ".reg .pred \t%p2; \n\t",
960b57cec5SDimitry Andric             "setp.ne.u32 \t%p1, $pred, 0; \n\t",
970b57cec5SDimitry Andric             "bar.red.or.pred \t%p2, 0, %p1; \n\t",
980b57cec5SDimitry Andric             "selp.u32 \t$dst, 1, 0, %p2; \n\t",
990b57cec5SDimitry Andric             "}}"),
1000b57cec5SDimitry Andric      [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>;
1010b57cec5SDimitry Andric
1020b57cec5SDimitry Andricdef INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
1030b57cec5SDimitry Andric                             [(int_nvvm_bar_sync imm:$i)]>;
1040b57cec5SDimitry Andric
1050b57cec5SDimitry Andricdef INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
1060b57cec5SDimitry Andric                             [(int_nvvm_bar_warp_sync imm:$i)]>,
10706c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
1080b57cec5SDimitry Andricdef INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
1090b57cec5SDimitry Andric                             [(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
11006c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
1110b57cec5SDimitry Andric
1120b57cec5SDimitry Andricdef INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
1130b57cec5SDimitry Andric                                   [(int_nvvm_barrier_sync imm:$i)]>,
11406c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
1150b57cec5SDimitry Andricdef INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
1160b57cec5SDimitry Andric                                   [(int_nvvm_barrier_sync Int32Regs:$i)]>,
11706c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
1180b57cec5SDimitry Andric
1190b57cec5SDimitry Andricdef INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
1200b57cec5SDimitry Andric                 "barrier.sync \t$id, $cnt;",
1210b57cec5SDimitry Andric                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
12206c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
1230b57cec5SDimitry Andricdef INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
1240b57cec5SDimitry Andric                 "barrier.sync \t$id, $cnt;",
1250b57cec5SDimitry Andric                 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
12606c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
1270b57cec5SDimitry Andricdef INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
1280b57cec5SDimitry Andric                 "barrier.sync \t$id, $cnt;",
1290b57cec5SDimitry Andric                 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
13006c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
1310b57cec5SDimitry Andricdef INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
1320b57cec5SDimitry Andric                 "barrier.sync \t$id, $cnt;",
1330b57cec5SDimitry Andric                 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
13406c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
1357a6dacacSDimitry Andric
13606c3fb27SDimitry Andricclass INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
13706c3fb27SDimitry Andric                          list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
13806c3fb27SDimitry Andric        NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
13906c3fb27SDimitry Andric        Requires<Preds>;
14006c3fb27SDimitry Andric
14106c3fb27SDimitry Andricdef barrier_cluster_arrive:
14206c3fb27SDimitry Andric        INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
14306c3fb27SDimitry Andricdef barrier_cluster_arrive_relaxed:
14406c3fb27SDimitry Andric        INT_BARRIER_CLUSTER<"arrive.relaxed",
14506c3fb27SDimitry Andric        int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
14606c3fb27SDimitry Andricdef barrier_cluster_wait:
14706c3fb27SDimitry Andric        INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
1480b57cec5SDimitry Andric
1497a6dacacSDimitry Andric// 'aligned' versions of the cluster barrier intrinsics
1507a6dacacSDimitry Andricdef barrier_cluster_arrive_aligned:
1517a6dacacSDimitry Andric        INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>;
1527a6dacacSDimitry Andricdef barrier_cluster_arrive_relaxed_aligned:
1537a6dacacSDimitry Andric        INT_BARRIER_CLUSTER<"arrive.relaxed.aligned",
1547a6dacacSDimitry Andric        int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>;
1557a6dacacSDimitry Andricdef barrier_cluster_wait_aligned:
1567a6dacacSDimitry Andric        INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>;
1577a6dacacSDimitry Andric
1588bcb0991SDimitry Andricclass SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
1598bcb0991SDimitry Andric                 bit offset_imm, bit mask_imm, bit threadmask_imm>
1608bcb0991SDimitry Andric      : NVPTXInst<(outs), (ins), "?", []> {
1618bcb0991SDimitry Andric  NVPTXRegClass rc = !cond(
1628bcb0991SDimitry Andric    !eq(reg, "i32"): Int32Regs,
1638bcb0991SDimitry Andric    !eq(reg, "f32"): Float32Regs);
1648bcb0991SDimitry Andric  string IntrName = "int_nvvm_shfl_"
1658bcb0991SDimitry Andric                    # !if(sync, "sync_", "")
1668bcb0991SDimitry Andric                    # mode
1678bcb0991SDimitry Andric                    # "_" # reg
1688bcb0991SDimitry Andric                    # !if(return_pred, "p", "");
1698bcb0991SDimitry Andric  Intrinsic Intr = !cast<Intrinsic>(IntrName);
1708bcb0991SDimitry Andric  let InOperandList = !con(
1718bcb0991SDimitry Andric    !if(sync,
1728bcb0991SDimitry Andric        !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]),
1738bcb0991SDimitry Andric        (ins)),
1748bcb0991SDimitry Andric    (ins rc:$src),
1758bcb0991SDimitry Andric    !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]),
1768bcb0991SDimitry Andric    !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"])
1778bcb0991SDimitry Andric    );
1788bcb0991SDimitry Andric  let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst));
1798bcb0991SDimitry Andric  let AsmString = "shfl."
1808bcb0991SDimitry Andric     # !if(sync, "sync.", "")
1818bcb0991SDimitry Andric     # mode # ".b32\t"
1828bcb0991SDimitry Andric     # "$dst"
1838bcb0991SDimitry Andric     # !if(return_pred, "|$pred", "") # ", "
1848bcb0991SDimitry Andric     # "$src, $offset, $mask"
1858bcb0991SDimitry Andric     # !if(sync, ", $threadmask", "")
1868bcb0991SDimitry Andric     # ";"
1878bcb0991SDimitry Andric     ;
1888bcb0991SDimitry Andric  let Pattern = [!con(
1898bcb0991SDimitry Andric      !foreach(tmp, OutOperandList,
1908bcb0991SDimitry Andric             !subst(outs, set,
1918bcb0991SDimitry Andric             !subst(i32imm, imm, tmp))),
1928bcb0991SDimitry Andric      (set !foreach(tmp, InOperandList,
1938bcb0991SDimitry Andric             !subst(ins, Intr,
1948bcb0991SDimitry Andric             !subst(i32imm, imm, tmp))))
1958bcb0991SDimitry Andric  )];
1960b57cec5SDimitry Andric}
1970b57cec5SDimitry Andric
198e8d8bef9SDimitry Andricforeach sync = [false, true] in {
1998bcb0991SDimitry Andric  foreach mode = ["up", "down", "bfly", "idx"] in {
2008bcb0991SDimitry Andric    foreach regclass = ["i32", "f32"] in {
201e8d8bef9SDimitry Andric      foreach return_pred = [false, true] in {
202e8d8bef9SDimitry Andric        foreach offset_imm = [false, true] in {
203e8d8bef9SDimitry Andric          foreach mask_imm = [false, true] in {
2048bcb0991SDimitry Andric            foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
2058bcb0991SDimitry Andric              def : SHFL_INSTR<sync, mode, regclass, return_pred,
2068bcb0991SDimitry Andric                               offset_imm, mask_imm, threadmask_imm>,
20706c3fb27SDimitry Andric                    Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
2080b57cec5SDimitry Andric            }
2098bcb0991SDimitry Andric          }
2108bcb0991SDimitry Andric        }
2118bcb0991SDimitry Andric      }
2128bcb0991SDimitry Andric    }
2138bcb0991SDimitry Andric  }
2148bcb0991SDimitry Andric}
2150b57cec5SDimitry Andric
2160b57cec5SDimitry Andric// vote.{all,any,uni,ballot}
2170b57cec5SDimitry Andricmulticlass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
2180b57cec5SDimitry Andric  def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
2190b57cec5SDimitry Andric              "vote." # mode # " \t$dest, $pred;",
2200b57cec5SDimitry Andric              [(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
22106c3fb27SDimitry Andric        Requires<[hasPTX<60>, hasSM<30>]>;
2220b57cec5SDimitry Andric}
2230b57cec5SDimitry Andric
2240b57cec5SDimitry Andricdefm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
2250b57cec5SDimitry Andricdefm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>;
2260b57cec5SDimitry Andricdefm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>;
2270b57cec5SDimitry Andricdefm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>;
2280b57cec5SDimitry Andric
2290b57cec5SDimitry Andric// vote.sync.{all,any,uni,ballot}
2300b57cec5SDimitry Andricmulticlass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
2310b57cec5SDimitry Andric  def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
2320b57cec5SDimitry Andric              "vote.sync." # mode # " \t$dest, $pred, $mask;",
2330b57cec5SDimitry Andric              [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
23406c3fb27SDimitry Andric          Requires<[hasPTX<60>, hasSM<30>]>;
2350b57cec5SDimitry Andric  def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
2360b57cec5SDimitry Andric              "vote.sync." # mode #" \t$dest, $pred, $mask;",
2370b57cec5SDimitry Andric              [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
23806c3fb27SDimitry Andric          Requires<[hasPTX<60>, hasSM<30>]>;
2390b57cec5SDimitry Andric}
2400b57cec5SDimitry Andric
2410b57cec5SDimitry Andricdefm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
2420b57cec5SDimitry Andricdefm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>;
2430b57cec5SDimitry Andricdefm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>;
2440b57cec5SDimitry Andricdefm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>;
2450b57cec5SDimitry Andric
2460b57cec5SDimitry Andricmulticlass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
2470b57cec5SDimitry Andric                          Operand ImmOp> {
24881ad6265SDimitry Andric  def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
2490b57cec5SDimitry Andric              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
25081ad6265SDimitry Andric              [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
25106c3fb27SDimitry Andric           Requires<[hasPTX<60>, hasSM<70>]>;
25281ad6265SDimitry Andric  def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
2530b57cec5SDimitry Andric              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
25481ad6265SDimitry Andric              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
25506c3fb27SDimitry Andric           Requires<[hasPTX<60>, hasSM<70>]>;
25681ad6265SDimitry Andric  def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
2570b57cec5SDimitry Andric              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
25881ad6265SDimitry Andric              [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
25906c3fb27SDimitry Andric           Requires<[hasPTX<60>, hasSM<70>]>;
26081ad6265SDimitry Andric  def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
2610b57cec5SDimitry Andric              "match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
26281ad6265SDimitry Andric              [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
26306c3fb27SDimitry Andric           Requires<[hasPTX<60>, hasSM<70>]>;
2640b57cec5SDimitry Andric}
2650b57cec5SDimitry Andric
266*0fca6ea1SDimitry Andric// activemask.b32
267*0fca6ea1SDimitry Andricdef ACTIVEMASK : NVPTXInst<(outs Int32Regs:$dest), (ins),
268*0fca6ea1SDimitry Andric                    "activemask.b32 \t$dest;",
269*0fca6ea1SDimitry Andric                    [(set Int32Regs:$dest, (int_nvvm_activemask))]>,
270*0fca6ea1SDimitry Andric                 Requires<[hasPTX<62>, hasSM<30>]>;
271*0fca6ea1SDimitry Andric
2720b57cec5SDimitry Andricdefm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
2730b57cec5SDimitry Andric                                        i32imm>;
2740b57cec5SDimitry Andricdefm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64,
2750b57cec5SDimitry Andric                                        i64imm>;
2760b57cec5SDimitry Andric
2770b57cec5SDimitry Andricmulticlass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp,
2780b57cec5SDimitry Andric                          Operand ImmOp> {
27981ad6265SDimitry Andric  def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
2800b57cec5SDimitry Andric                     (ins i32imm:$mask, ImmOp:$value),
2810b57cec5SDimitry Andric              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
28281ad6265SDimitry Andric              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
28306c3fb27SDimitry Andric           Requires<[hasPTX<60>, hasSM<70>]>;
28481ad6265SDimitry Andric  def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
2850b57cec5SDimitry Andric                     (ins Int32Regs:$mask, ImmOp:$value),
2860b57cec5SDimitry Andric              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
28781ad6265SDimitry Andric              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
28806c3fb27SDimitry Andric           Requires<[hasPTX<60>, hasSM<70>]>;
28981ad6265SDimitry Andric  def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
2900b57cec5SDimitry Andric                     (ins i32imm:$mask, regclass:$value),
2910b57cec5SDimitry Andric              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
29281ad6265SDimitry Andric              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
29306c3fb27SDimitry Andric           Requires<[hasPTX<60>, hasSM<70>]>;
29481ad6265SDimitry Andric  def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
2950b57cec5SDimitry Andric                     (ins Int32Regs:$mask, regclass:$value),
2960b57cec5SDimitry Andric              "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
29781ad6265SDimitry Andric              [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
29806c3fb27SDimitry Andric           Requires<[hasPTX<60>, hasSM<70>]>;
2990b57cec5SDimitry Andric}
3000b57cec5SDimitry Andricdefm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
3010b57cec5SDimitry Andric                                         i32imm>;
3020b57cec5SDimitry Andricdefm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p,
3030b57cec5SDimitry Andric                                         i64imm>;
3040b57cec5SDimitry Andric
305fe6060f1SDimitry Andricmulticlass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
306fe6060f1SDimitry Andric  def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
307fe6060f1SDimitry Andric          "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
308fe6060f1SDimitry Andric          [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
30906c3fb27SDimitry Andric        Requires<[hasPTX<70>, hasSM<80>]>;
310fe6060f1SDimitry Andric}
311fe6060f1SDimitry Andric
312fe6060f1SDimitry Andricdefm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
313fe6060f1SDimitry Andricdefm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>;
314fe6060f1SDimitry Andricdefm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>;
315fe6060f1SDimitry Andricdefm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>;
316fe6060f1SDimitry Andricdefm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>;
317fe6060f1SDimitry Andricdefm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>;
318fe6060f1SDimitry Andricdefm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>;
319fe6060f1SDimitry Andricdefm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>;
320fe6060f1SDimitry Andric
321e8d8bef9SDimitry Andric} // isConvergent = true
3220b57cec5SDimitry Andric
3230b57cec5SDimitry Andric//-----------------------------------
3240b57cec5SDimitry Andric// Explicit Memory Fence Functions
3250b57cec5SDimitry Andric//-----------------------------------
3260b57cec5SDimitry Andricclass MEMBAR<string StrOp, Intrinsic IntOP> :
3270b57cec5SDimitry Andric              NVPTXInst<(outs), (ins),
3280b57cec5SDimitry Andric            StrOp, [(IntOP)]>;
3290b57cec5SDimitry Andric
3300b57cec5SDimitry Andricdef INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
3310b57cec5SDimitry Andricdef INT_MEMBAR_GL  : MEMBAR<"membar.gl;",  int_nvvm_membar_gl>;
3320b57cec5SDimitry Andricdef INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
3330b57cec5SDimitry Andric
33406c3fb27SDimitry Andricdef INT_FENCE_SC_CLUSTER:
33506c3fb27SDimitry Andric       MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
33606c3fb27SDimitry Andric       Requires<[hasPTX<78>, hasSM<90>]>;
3370b57cec5SDimitry Andric
3380b57cec5SDimitry Andric//-----------------------------------
339fe6060f1SDimitry Andric// Async Copy Functions
340fe6060f1SDimitry Andric//-----------------------------------
341fe6060f1SDimitry Andric
342fe6060f1SDimitry Andricmulticlass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> {
343fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
344fe6060f1SDimitry Andric            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
345fe6060f1SDimitry Andric            [(Intrin Int32Regs:$addr)]>,
34606c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
347fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
348fe6060f1SDimitry Andric            !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
349fe6060f1SDimitry Andric            [(Intrin Int64Regs:$addr)]>,
35006c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
351fe6060f1SDimitry Andric}
352fe6060f1SDimitry Andric
353fe6060f1SDimitry Andricdefm CP_ASYNC_MBARRIER_ARRIVE :
354fe6060f1SDimitry Andric  CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>;
355fe6060f1SDimitry Andricdefm CP_ASYNC_MBARRIER_ARRIVE_SHARED :
356fe6060f1SDimitry Andric  CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>;
357fe6060f1SDimitry Andricdefm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
358fe6060f1SDimitry Andric  CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>;
359fe6060f1SDimitry Andricdefm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
360fe6060f1SDimitry Andric  CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
361fe6060f1SDimitry Andric
36206c3fb27SDimitry Andricmulticlass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
363fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
36406c3fb27SDimitry Andric            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
365fe6060f1SDimitry Andric            [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
36606c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
367fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
36806c3fb27SDimitry Andric            !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
369fe6060f1SDimitry Andric            [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
37006c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
37106c3fb27SDimitry Andric  // Variant with src_size parameter
37206c3fb27SDimitry Andric  def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
37306c3fb27SDimitry Andric             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
37406c3fb27SDimitry Andric             [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
37506c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
37606c3fb27SDimitry Andric  def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
37706c3fb27SDimitry Andric             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
37806c3fb27SDimitry Andric             [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
37906c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
38006c3fb27SDimitry Andric  def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
38106c3fb27SDimitry Andric             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
38206c3fb27SDimitry Andric             [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
38306c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
38406c3fb27SDimitry Andric  def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
38506c3fb27SDimitry Andric             !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
38606c3fb27SDimitry Andric             [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
38706c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
388fe6060f1SDimitry Andric}
389fe6060f1SDimitry Andric
390fe6060f1SDimitry Andricdefm CP_ASYNC_CA_SHARED_GLOBAL_4 :
39106c3fb27SDimitry Andric  CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
39206c3fb27SDimitry Andric                                      int_nvvm_cp_async_ca_shared_global_4_s>;
393fe6060f1SDimitry Andric
394fe6060f1SDimitry Andricdefm CP_ASYNC_CA_SHARED_GLOBAL_8 :
39506c3fb27SDimitry Andric  CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
39606c3fb27SDimitry Andric                                      int_nvvm_cp_async_ca_shared_global_8_s>;
397fe6060f1SDimitry Andric
398fe6060f1SDimitry Andricdefm CP_ASYNC_CA_SHARED_GLOBAL_16 :
39906c3fb27SDimitry Andric  CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
40006c3fb27SDimitry Andric                                       int_nvvm_cp_async_ca_shared_global_16_s>;
401fe6060f1SDimitry Andric
402fe6060f1SDimitry Andricdefm CP_ASYNC_CG_SHARED_GLOBAL_16 :
40306c3fb27SDimitry Andric  CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
40406c3fb27SDimitry Andric                                       int_nvvm_cp_async_cg_shared_global_16_s>;
405fe6060f1SDimitry Andric
406fe6060f1SDimitry Andricdef CP_ASYNC_COMMIT_GROUP :
407fe6060f1SDimitry Andric  NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
40806c3fb27SDimitry Andric  Requires<[hasPTX<70>, hasSM<80>]>;
409fe6060f1SDimitry Andric
410fe6060f1SDimitry Andricdef CP_ASYNC_WAIT_GROUP :
411fe6060f1SDimitry Andric  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
412fe6060f1SDimitry Andric  [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
41306c3fb27SDimitry Andric  Requires<[hasPTX<70>, hasSM<80>]>;
414fe6060f1SDimitry Andric
415fe6060f1SDimitry Andricdef CP_ASYNC_WAIT_ALL :
416fe6060f1SDimitry Andric  NVPTXInst<(outs), (ins), "cp.async.wait_all;",
417fe6060f1SDimitry Andric  [(int_nvvm_cp_async_wait_all)]>,
41806c3fb27SDimitry Andric  Requires<[hasPTX<70>, hasSM<80>]>;
419fe6060f1SDimitry Andric
4207a6dacacSDimitry Andric// cp.async.bulk variants of the commit/wait group
4217a6dacacSDimitry Andricdef CP_ASYNC_BULK_COMMIT_GROUP :
4227a6dacacSDimitry Andric  NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;",
4237a6dacacSDimitry Andric  [(int_nvvm_cp_async_bulk_commit_group)]>,
4247a6dacacSDimitry Andric  Requires<[hasPTX<80>, hasSM<90>]>;
4257a6dacacSDimitry Andric
4267a6dacacSDimitry Andricdef CP_ASYNC_BULK_WAIT_GROUP :
4277a6dacacSDimitry Andric  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;",
4287a6dacacSDimitry Andric  [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>,
4297a6dacacSDimitry Andric  Requires<[hasPTX<80>, hasSM<90>]>;
4307a6dacacSDimitry Andric
4317a6dacacSDimitry Andricdef CP_ASYNC_BULK_WAIT_GROUP_READ :
4327a6dacacSDimitry Andric  NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;",
4337a6dacacSDimitry Andric  [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>,
4347a6dacacSDimitry Andric  Requires<[hasPTX<80>, hasSM<90>]>;
4357a6dacacSDimitry Andric
436fe6060f1SDimitry Andric//-----------------------------------
437fe6060f1SDimitry Andric// MBarrier Functions
438fe6060f1SDimitry Andric//-----------------------------------
439fe6060f1SDimitry Andric
440fe6060f1SDimitry Andricmulticlass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
441fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
442fe6060f1SDimitry Andric           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
443fe6060f1SDimitry Andric    [(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
44406c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
445fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
446fe6060f1SDimitry Andric           !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
447fe6060f1SDimitry Andric    [(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
44806c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
449fe6060f1SDimitry Andric}
450fe6060f1SDimitry Andric
451fe6060f1SDimitry Andricdefm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
452fe6060f1SDimitry Andricdefm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared",
453fe6060f1SDimitry Andric                                          int_nvvm_mbarrier_init_shared>;
454fe6060f1SDimitry Andric
455fe6060f1SDimitry Andricmulticlass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
456fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
457fe6060f1SDimitry Andric           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
458fe6060f1SDimitry Andric    [(Intrin Int32Regs:$addr)]>,
45906c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
460fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
461fe6060f1SDimitry Andric           !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
462fe6060f1SDimitry Andric    [(Intrin Int64Regs:$addr)]>,
46306c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
464fe6060f1SDimitry Andric}
465fe6060f1SDimitry Andric
466fe6060f1SDimitry Andricdefm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
467fe6060f1SDimitry Andricdefm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared",
468fe6060f1SDimitry Andric                                            int_nvvm_mbarrier_inval_shared>;
469fe6060f1SDimitry Andric
470fe6060f1SDimitry Andricmulticlass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
471fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
472fe6060f1SDimitry Andric           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
473fe6060f1SDimitry Andric    [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
47406c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
475fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
476fe6060f1SDimitry Andric           !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
477fe6060f1SDimitry Andric    [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
47806c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
479fe6060f1SDimitry Andric}
480fe6060f1SDimitry Andric
481fe6060f1SDimitry Andricdefm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
482fe6060f1SDimitry Andricdefm MBARRIER_ARRIVE_SHARED :
483fe6060f1SDimitry Andric  MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>;
484fe6060f1SDimitry Andric
485fe6060f1SDimitry Andricmulticlass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
486fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs Int64Regs:$state),
487fe6060f1SDimitry Andric           (ins Int32Regs:$addr, Int32Regs:$count),
488fe6060f1SDimitry Andric           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
489fe6060f1SDimitry Andric                      ".b64 $state, [$addr], $count;"),
490fe6060f1SDimitry Andric    [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
49106c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
492fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs Int64Regs:$state),
493fe6060f1SDimitry Andric           (ins Int64Regs:$addr, Int32Regs:$count),
494fe6060f1SDimitry Andric           !strconcat("mbarrier.arrive.noComplete", AddrSpace,
495fe6060f1SDimitry Andric                      ".b64 $state, [$addr], $count;"),
496fe6060f1SDimitry Andric    [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
49706c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
498fe6060f1SDimitry Andric}
499fe6060f1SDimitry Andric
500fe6060f1SDimitry Andricdefm MBARRIER_ARRIVE_NOCOMPLETE :
501fe6060f1SDimitry Andric  MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>;
502fe6060f1SDimitry Andricdefm MBARRIER_ARRIVE_NOCOMPLETE_SHARED :
503fe6060f1SDimitry Andric  MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>;
504fe6060f1SDimitry Andric
505fe6060f1SDimitry Andricmulticlass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
506fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
507fe6060f1SDimitry Andric           !strconcat("mbarrier.arrive_drop", AddrSpace,
508fe6060f1SDimitry Andric                      ".b64 $state, [$addr];"),
509fe6060f1SDimitry Andric           [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
51006c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
511fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
512fe6060f1SDimitry Andric           !strconcat("mbarrier.arrive_drop", AddrSpace,
513fe6060f1SDimitry Andric                      ".b64 $state, [$addr];"),
514fe6060f1SDimitry Andric           [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
51506c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
516fe6060f1SDimitry Andric}
517fe6060f1SDimitry Andric
518fe6060f1SDimitry Andricdefm MBARRIER_ARRIVE_DROP :
519fe6060f1SDimitry Andric  MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>;
520fe6060f1SDimitry Andricdefm MBARRIER_ARRIVE_DROP_SHARED :
521fe6060f1SDimitry Andric  MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>;
522fe6060f1SDimitry Andric
523fe6060f1SDimitry Andricmulticlass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
524fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs Int64Regs:$state),
525fe6060f1SDimitry Andric           (ins Int32Regs:$addr, Int32Regs:$count),
526fe6060f1SDimitry Andric           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
527fe6060f1SDimitry Andric                      ".b64 $state, [$addr], $count;"),
528fe6060f1SDimitry Andric           [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
52906c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
530fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs Int64Regs:$state),
531fe6060f1SDimitry Andric           (ins Int64Regs:$addr, Int32Regs:$count),
532fe6060f1SDimitry Andric           !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
533fe6060f1SDimitry Andric                      ".b64 $state, [$addr], $count;"),
534fe6060f1SDimitry Andric           [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
53506c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
536fe6060f1SDimitry Andric}
537fe6060f1SDimitry Andric
538fe6060f1SDimitry Andricdefm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
539fe6060f1SDimitry Andric  MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>;
540fe6060f1SDimitry Andricdefm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED :
541fe6060f1SDimitry Andric  MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared",
542fe6060f1SDimitry Andric                       int_nvvm_mbarrier_arrive_drop_noComplete_shared>;
543fe6060f1SDimitry Andric
544fe6060f1SDimitry Andricmulticlass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
545fe6060f1SDimitry Andric  def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
546fe6060f1SDimitry Andric           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
547fe6060f1SDimitry Andric           [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
54806c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
549fe6060f1SDimitry Andric  def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
550fe6060f1SDimitry Andric           !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
551fe6060f1SDimitry Andric           [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
55206c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
553fe6060f1SDimitry Andric}
554fe6060f1SDimitry Andric
555fe6060f1SDimitry Andricdefm MBARRIER_TEST_WAIT :
556fe6060f1SDimitry Andric  MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>;
557fe6060f1SDimitry Andricdefm MBARRIER_TEST_WAIT_SHARED :
558fe6060f1SDimitry Andric  MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>;
559fe6060f1SDimitry Andric
560fe6060f1SDimitry Andricclass MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
561fe6060f1SDimitry Andric           NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
562fe6060f1SDimitry Andric           "mbarrier.pending_count.b64 $res, $state;",
563fe6060f1SDimitry Andric           [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
56406c3fb27SDimitry Andric    Requires<[hasPTX<70>, hasSM<80>]>;
565fe6060f1SDimitry Andric
566fe6060f1SDimitry Andricdef MBARRIER_PENDING_COUNT :
567fe6060f1SDimitry Andric  MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
568fe6060f1SDimitry Andric
569fe6060f1SDimitry Andric//-----------------------------------
5700b57cec5SDimitry Andric// Math Functions
5710b57cec5SDimitry Andric//-----------------------------------
5720b57cec5SDimitry Andric
5730b57cec5SDimitry Andric// Map min(1.0, max(0.0, x)) to sat(x)
5740b57cec5SDimitry Andric// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is
5750b57cec5SDimitry Andric// NaN
5760b57cec5SDimitry Andric// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0.
5770b57cec5SDimitry Andric// Same story for fmax, fmin.
5780b57cec5SDimitry Andric
5790b57cec5SDimitry Andricdef : Pat<(int_nvvm_fmin_f immFloat1,
5800b57cec5SDimitry Andric            (int_nvvm_fmax_f immFloat0, Float32Regs:$a)),
5810b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
5820b57cec5SDimitry Andricdef : Pat<(int_nvvm_fmin_f immFloat1,
5830b57cec5SDimitry Andric            (int_nvvm_fmax_f Float32Regs:$a, immFloat0)),
5840b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
5850b57cec5SDimitry Andricdef : Pat<(int_nvvm_fmin_f
5860b57cec5SDimitry Andric            (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1),
5870b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
5880b57cec5SDimitry Andricdef : Pat<(int_nvvm_fmin_f
5890b57cec5SDimitry Andric            (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1),
5900b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
5910b57cec5SDimitry Andric
5920b57cec5SDimitry Andricdef : Pat<(int_nvvm_fmin_d immDouble1,
5930b57cec5SDimitry Andric            (int_nvvm_fmax_d immDouble0, Float64Regs:$a)),
5940b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
5950b57cec5SDimitry Andricdef : Pat<(int_nvvm_fmin_d immDouble1,
5960b57cec5SDimitry Andric            (int_nvvm_fmax_d Float64Regs:$a, immDouble0)),
5970b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
5980b57cec5SDimitry Andricdef : Pat<(int_nvvm_fmin_d
5990b57cec5SDimitry Andric            (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1),
6000b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
6010b57cec5SDimitry Andricdef : Pat<(int_nvvm_fmin_d
6020b57cec5SDimitry Andric            (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1),
6030b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
6040b57cec5SDimitry Andric
6050b57cec5SDimitry Andric
6060b57cec5SDimitry Andric// We need a full string for OpcStr here because we need to deal with case like
6070b57cec5SDimitry Andric// INT_PTX_RECIP.
6080b57cec5SDimitry Andricclass F_MATH_1<string OpcStr, NVPTXRegClass target_regclass,
60981ad6265SDimitry Andric  NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
6100b57cec5SDimitry Andric            : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0),
6110b57cec5SDimitry Andric            OpcStr,
61281ad6265SDimitry Andric        [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>,
61381ad6265SDimitry Andric        Requires<Preds>;
6140b57cec5SDimitry Andric
6150b57cec5SDimitry Andric// We need a full string for OpcStr here because we need to deal with the case
6160b57cec5SDimitry Andric// like INT_PTX_NATIVE_POWR_F.
6170b57cec5SDimitry Andricclass F_MATH_2<string OpcStr, NVPTXRegClass t_regclass,
61881ad6265SDimitry Andric  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP,
61981ad6265SDimitry Andric  list<Predicate> Preds = []>
6200b57cec5SDimitry Andric            : NVPTXInst<(outs t_regclass:$dst),
6210b57cec5SDimitry Andric              (ins s0_regclass:$src0, s1_regclass:$src1),
6220b57cec5SDimitry Andric            OpcStr,
62381ad6265SDimitry Andric        [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>,
62481ad6265SDimitry Andric        Requires<Preds>;
6250b57cec5SDimitry Andric
6260b57cec5SDimitry Andricclass F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
6270b57cec5SDimitry Andric  NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass,
62881ad6265SDimitry Andric  NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []>
6290b57cec5SDimitry Andric            : NVPTXInst<(outs t_regclass:$dst),
6300b57cec5SDimitry Andric              (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2),
6310b57cec5SDimitry Andric            OpcStr,
6320b57cec5SDimitry Andric        [(set t_regclass:$dst,
63381ad6265SDimitry Andric          (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>,
63481ad6265SDimitry Andric          Requires<Preds>;
6350b57cec5SDimitry Andric
6360b57cec5SDimitry Andric//
6370b57cec5SDimitry Andric// MISC
6380b57cec5SDimitry Andric//
6390b57cec5SDimitry Andric
6400b57cec5SDimitry Andricdef INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
6410b57cec5SDimitry Andric  Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;
6420b57cec5SDimitry Andric
643*0fca6ea1SDimitry Andricdef INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$i;",
644*0fca6ea1SDimitry Andric                             [(int_nvvm_nanosleep imm:$i)]>,
645*0fca6ea1SDimitry Andric        Requires<[hasPTX<63>, hasSM<70>]>;
646*0fca6ea1SDimitry Andricdef INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;",
647*0fca6ea1SDimitry Andric                             [(int_nvvm_nanosleep Int32Regs:$i)]>,
648*0fca6ea1SDimitry Andric        Requires<[hasPTX<63>, hasSM<70>]>;
6490b57cec5SDimitry Andric//
6500b57cec5SDimitry Andric// Min Max
6510b57cec5SDimitry Andric//
6520b57cec5SDimitry Andric
6530b57cec5SDimitry Andricdef INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs,
6540b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_fmin_f>;
6550b57cec5SDimitry Andricdef INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
6560b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
65781ad6265SDimitry Andricdef INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
65881ad6265SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
65906c3fb27SDimitry Andric  [hasPTX<70>, hasSM<80>]>;
66081ad6265SDimitry Andricdef INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
66181ad6265SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
66206c3fb27SDimitry Andric  [hasPTX<70>, hasSM<80>]>;
66381ad6265SDimitry Andricdef INT_NVVM_FMIN_XORSIGN_ABS_F :
66481ad6265SDimitry Andric  F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
66581ad6265SDimitry Andric    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
66606c3fb27SDimitry Andric    [hasPTX<72>, hasSM<86>]>;
66781ad6265SDimitry Andricdef INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
66881ad6265SDimitry Andric  F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
66981ad6265SDimitry Andric    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
67006c3fb27SDimitry Andric    [hasPTX<72>, hasSM<86>]>;
67181ad6265SDimitry Andricdef INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
67281ad6265SDimitry Andric  F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
67381ad6265SDimitry Andric    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
67406c3fb27SDimitry Andric    [hasPTX<72>, hasSM<86>]>;
67581ad6265SDimitry Andricdef INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
67681ad6265SDimitry Andric  F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
67781ad6265SDimitry Andric    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
67806c3fb27SDimitry Andric    [hasPTX<72>, hasSM<86>]>;
6790b57cec5SDimitry Andric
6800b57cec5SDimitry Andricdef INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
6810b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_fmax_f>;
6820b57cec5SDimitry Andricdef INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
6830b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
68481ad6265SDimitry Andricdef INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
68581ad6265SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
68606c3fb27SDimitry Andric  [hasPTX<70>, hasSM<80>]>;
68781ad6265SDimitry Andricdef INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
68881ad6265SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
68906c3fb27SDimitry Andric  [hasPTX<70>, hasSM<80>]>;
69081ad6265SDimitry Andricdef INT_NVVM_FMAX_XORSIGN_ABS_F :
69181ad6265SDimitry Andric  F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
69281ad6265SDimitry Andric    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
69306c3fb27SDimitry Andric    [hasPTX<72>, hasSM<86>]>;
69481ad6265SDimitry Andricdef INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
69581ad6265SDimitry Andric  F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
69681ad6265SDimitry Andric    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
69706c3fb27SDimitry Andric    [hasPTX<72>, hasSM<86>]>;
69881ad6265SDimitry Andricdef INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
69981ad6265SDimitry Andric  F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
70081ad6265SDimitry Andric    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
70106c3fb27SDimitry Andric    [hasPTX<72>, hasSM<86>]>;
70281ad6265SDimitry Andricdef INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
70381ad6265SDimitry Andric  F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
70481ad6265SDimitry Andric    Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
70506c3fb27SDimitry Andric    [hasPTX<72>, hasSM<86>]>;
7060b57cec5SDimitry Andric
7070b57cec5SDimitry Andricdef INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
7080b57cec5SDimitry Andric  Float64Regs, Float64Regs, int_nvvm_fmin_d>;
7090b57cec5SDimitry Andricdef INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
7100b57cec5SDimitry Andric  Float64Regs, Float64Regs, int_nvvm_fmax_d>;
7110b57cec5SDimitry Andric
71281ad6265SDimitry Andric//
71381ad6265SDimitry Andric// Min Max f16, f16x2, bf16, bf16x2
71481ad6265SDimitry Andric//
71581ad6265SDimitry Andric
71681ad6265SDimitry Andricclass MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
71706c3fb27SDimitry Andric                    list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
71881ad6265SDimitry Andric  string Variant = V;
71981ad6265SDimitry Andric  Intrinsic Intr = I;
72081ad6265SDimitry Andric  NVPTXRegClass RegClass = RC;
72181ad6265SDimitry Andric  list<Predicate> Predicates = Preds;
72281ad6265SDimitry Andric}
72381ad6265SDimitry Andric
72481ad6265SDimitry Andricmulticlass MIN_MAX<string IntName> {
72581ad6265SDimitry Andric  foreach P = [
72681ad6265SDimitry Andric    MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
72706c3fb27SDimitry Andric      int_nvvm_fmax_f16), Int16Regs>,
72881ad6265SDimitry Andric    MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
72906c3fb27SDimitry Andric      int_nvvm_fmax_ftz_f16), Int16Regs>,
73081ad6265SDimitry Andric    MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
73106c3fb27SDimitry Andric      int_nvvm_fmax_nan_f16), Int16Regs>,
73281ad6265SDimitry Andric    MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
73306c3fb27SDimitry Andric      int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
73481ad6265SDimitry Andric    MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
73581ad6265SDimitry Andric      int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
73606c3fb27SDimitry Andric      Int16Regs, [hasPTX<72>, hasSM<86>]>,
73781ad6265SDimitry Andric    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
73881ad6265SDimitry Andric      int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
73906c3fb27SDimitry Andric      Int16Regs, [hasPTX<72>, hasSM<86>]>,
74081ad6265SDimitry Andric    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
74181ad6265SDimitry Andric      int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
74206c3fb27SDimitry Andric      Int16Regs, [hasPTX<72>, hasSM<86>]>,
74381ad6265SDimitry Andric    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
74481ad6265SDimitry Andric      int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
74506c3fb27SDimitry Andric      int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
74681ad6265SDimitry Andric    MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
74706c3fb27SDimitry Andric      int_nvvm_fmax_f16x2), Int32Regs>,
74881ad6265SDimitry Andric    MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
74906c3fb27SDimitry Andric      int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
75081ad6265SDimitry Andric    MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
75106c3fb27SDimitry Andric      int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
75281ad6265SDimitry Andric    MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
75306c3fb27SDimitry Andric      int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
75481ad6265SDimitry Andric    MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
75581ad6265SDimitry Andric      int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
75606c3fb27SDimitry Andric      Int32Regs, [hasPTX<72>, hasSM<86>]>,
75781ad6265SDimitry Andric    MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
75881ad6265SDimitry Andric      int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
75906c3fb27SDimitry Andric      Int32Regs, [hasPTX<72>, hasSM<86>]>,
76081ad6265SDimitry Andric    MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
76181ad6265SDimitry Andric      int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
76206c3fb27SDimitry Andric      Int32Regs, [hasPTX<72>, hasSM<86>]>,
76381ad6265SDimitry Andric    MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
76481ad6265SDimitry Andric      int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
76581ad6265SDimitry Andric      int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
76606c3fb27SDimitry Andric      Int32Regs, [hasPTX<72>, hasSM<86>]>,
76781ad6265SDimitry Andric    MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
76881ad6265SDimitry Andric      int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
76981ad6265SDimitry Andric    MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
77081ad6265SDimitry Andric      int_nvvm_fmax_nan_bf16), Int16Regs>,
77181ad6265SDimitry Andric    MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
77281ad6265SDimitry Andric      int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
77306c3fb27SDimitry Andric      Int16Regs, [hasPTX<72>, hasSM<86>]>,
77481ad6265SDimitry Andric    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
77581ad6265SDimitry Andric      int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
77606c3fb27SDimitry Andric      Int16Regs, [hasPTX<72>, hasSM<86>]>,
77781ad6265SDimitry Andric    MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
77881ad6265SDimitry Andric      int_nvvm_fmax_bf16x2), Int32Regs>,
77981ad6265SDimitry Andric    MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
78081ad6265SDimitry Andric      int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
78181ad6265SDimitry Andric    MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
78281ad6265SDimitry Andric      int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
78306c3fb27SDimitry Andric      Int32Regs, [hasPTX<72>, hasSM<86>]>,
78481ad6265SDimitry Andric    MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
78581ad6265SDimitry Andric      int_nvvm_fmin_nan_xorsign_abs_bf16x2,
78681ad6265SDimitry Andric      int_nvvm_fmax_nan_xorsign_abs_bf16x2),
78706c3fb27SDimitry Andric      Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
78881ad6265SDimitry Andric        def P.Variant : F_MATH_2<!strconcat(
78981ad6265SDimitry Andric          IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
79081ad6265SDimitry Andric          P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
79181ad6265SDimitry Andric  }
79281ad6265SDimitry Andric}
79381ad6265SDimitry Andric
79481ad6265SDimitry Andricdefm INT_NVVM_FMIN : MIN_MAX<"min">;
79581ad6265SDimitry Andricdefm INT_NVVM_FMAN : MIN_MAX<"max">;
7960b57cec5SDimitry Andric
7970b57cec5SDimitry Andric//
7980b57cec5SDimitry Andric// Multiplication
7990b57cec5SDimitry Andric//
8000b57cec5SDimitry Andric
8017a6dacacSDimitry Andricdef INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs,
8027a6dacacSDimitry Andric  Int16Regs, Int16Regs, int_nvvm_mulhi_s>;
8037a6dacacSDimitry Andricdef INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs,
8047a6dacacSDimitry Andric  Int16Regs, Int16Regs, int_nvvm_mulhi_us>;
8050b57cec5SDimitry Andricdef INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs,
8060b57cec5SDimitry Andric  Int32Regs, Int32Regs, int_nvvm_mulhi_i>;
8070b57cec5SDimitry Andricdef INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs,
8080b57cec5SDimitry Andric  Int32Regs, Int32Regs, int_nvvm_mulhi_ui>;
8090b57cec5SDimitry Andricdef INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs,
8100b57cec5SDimitry Andric  Int64Regs, Int64Regs, int_nvvm_mulhi_ll>;
8110b57cec5SDimitry Andricdef INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs,
8120b57cec5SDimitry Andric  Int64Regs, Int64Regs, int_nvvm_mulhi_ull>;
8130b57cec5SDimitry Andric
8140b57cec5SDimitry Andricdef INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;",
8150b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>;
8160b57cec5SDimitry Andricdef INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;",
8170b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>;
8180b57cec5SDimitry Andricdef INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;",
8190b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>;
8200b57cec5SDimitry Andricdef INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;",
8210b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>;
8220b57cec5SDimitry Andricdef INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;",
8230b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>;
8240b57cec5SDimitry Andricdef INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;",
8250b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>;
8260b57cec5SDimitry Andricdef INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;",
8270b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>;
8280b57cec5SDimitry Andricdef INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;",
8290b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>;
8300b57cec5SDimitry Andric
8310b57cec5SDimitry Andricdef INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;",
8320b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>;
8330b57cec5SDimitry Andricdef INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;",
8340b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>;
8350b57cec5SDimitry Andricdef INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;",
8360b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>;
8370b57cec5SDimitry Andricdef INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;",
8380b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>;
8390b57cec5SDimitry Andric
8400b57cec5SDimitry Andricdef INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;",
8410b57cec5SDimitry Andric  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>;
8420b57cec5SDimitry Andricdef INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;",
8430b57cec5SDimitry Andric  Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>;
8440b57cec5SDimitry Andric
8450b57cec5SDimitry Andric//
8460b57cec5SDimitry Andric// Div
8470b57cec5SDimitry Andric//
8480b57cec5SDimitry Andric
8490b57cec5SDimitry Andricdef INT_NVVM_DIV_APPROX_FTZ_F
8500b57cec5SDimitry Andric  : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs,
8510b57cec5SDimitry Andric    Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>;
8520b57cec5SDimitry Andricdef INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;",
8530b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>;
8540b57cec5SDimitry Andric
8550b57cec5SDimitry Andricdef INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;",
8560b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>;
8570b57cec5SDimitry Andricdef INT_NVVM_DIV_RN_F     : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;",
8580b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>;
8590b57cec5SDimitry Andricdef INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;",
8600b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>;
8610b57cec5SDimitry Andricdef INT_NVVM_DIV_RZ_F     : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;",
8620b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>;
8630b57cec5SDimitry Andricdef INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;",
8640b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>;
8650b57cec5SDimitry Andricdef INT_NVVM_DIV_RM_F     : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;",
8660b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>;
8670b57cec5SDimitry Andricdef INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;",
8680b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>;
8690b57cec5SDimitry Andricdef INT_NVVM_DIV_RP_F     : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;",
8700b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>;
8710b57cec5SDimitry Andric
8720b57cec5SDimitry Andricdef INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;",
8730b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>;
8740b57cec5SDimitry Andricdef INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;",
8750b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>;
8760b57cec5SDimitry Andricdef INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;",
8770b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>;
8780b57cec5SDimitry Andricdef INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;",
8790b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>;
8800b57cec5SDimitry Andric
8810b57cec5SDimitry Andric//
8820b57cec5SDimitry Andric// Sad
8830b57cec5SDimitry Andric//
8840b57cec5SDimitry Andric
8857a6dacacSDimitry Andricdef INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;",
8867a6dacacSDimitry Andric  Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>;
8877a6dacacSDimitry Andricdef INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;",
8887a6dacacSDimitry Andric  Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>;
8890b57cec5SDimitry Andricdef INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;",
8900b57cec5SDimitry Andric  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>;
8910b57cec5SDimitry Andricdef INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;",
8920b57cec5SDimitry Andric  Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>;
8937a6dacacSDimitry Andricdef INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;",
8947a6dacacSDimitry Andric  Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>;
8957a6dacacSDimitry Andricdef INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;",
8967a6dacacSDimitry Andric  Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>;
8970b57cec5SDimitry Andric
8980b57cec5SDimitry Andric//
8990b57cec5SDimitry Andric// Floor  Ceil
9000b57cec5SDimitry Andric//
9010b57cec5SDimitry Andric
9020b57cec5SDimitry Andricdef : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a),
9030b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
9040b57cec5SDimitry Andricdef : Pat<(int_nvvm_floor_f Float32Regs:$a),
9050b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>;
9060b57cec5SDimitry Andricdef : Pat<(int_nvvm_floor_d Float64Regs:$a),
9070b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
9080b57cec5SDimitry Andric
9090b57cec5SDimitry Andricdef : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a),
9100b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
9110b57cec5SDimitry Andricdef : Pat<(int_nvvm_ceil_f Float32Regs:$a),
9120b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>;
9130b57cec5SDimitry Andricdef : Pat<(int_nvvm_ceil_d Float64Regs:$a),
9140b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
9150b57cec5SDimitry Andric
9160b57cec5SDimitry Andric//
9170b57cec5SDimitry Andric// Abs
9180b57cec5SDimitry Andric//
9190b57cec5SDimitry Andric
9200b57cec5SDimitry Andricdef INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs,
9210b57cec5SDimitry Andric  Float32Regs, int_nvvm_fabs_ftz_f>;
9220b57cec5SDimitry Andricdef INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs,
9230b57cec5SDimitry Andric  Float32Regs, int_nvvm_fabs_f>;
9240b57cec5SDimitry Andric
9250b57cec5SDimitry Andricdef INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
9260b57cec5SDimitry Andric  Float64Regs, int_nvvm_fabs_d>;
9270b57cec5SDimitry Andric
9280b57cec5SDimitry Andric//
92981ad6265SDimitry Andric// Abs, Neg bf16, bf16x2
93081ad6265SDimitry Andric//
93181ad6265SDimitry Andric
93281ad6265SDimitry Andricdef INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
93306c3fb27SDimitry Andric  Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
93481ad6265SDimitry Andricdef INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
93506c3fb27SDimitry Andric  Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
93681ad6265SDimitry Andricdef INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
93706c3fb27SDimitry Andric  Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
93881ad6265SDimitry Andricdef INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
93906c3fb27SDimitry Andric  Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
94081ad6265SDimitry Andric
94181ad6265SDimitry Andric//
9420b57cec5SDimitry Andric// Round
9430b57cec5SDimitry Andric//
9440b57cec5SDimitry Andric
9450b57cec5SDimitry Andricdef : Pat<(int_nvvm_round_ftz_f Float32Regs:$a),
9460b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
9470b57cec5SDimitry Andricdef : Pat<(int_nvvm_round_f Float32Regs:$a),
9480b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>;
9490b57cec5SDimitry Andricdef : Pat<(int_nvvm_round_d Float64Regs:$a),
9500b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
9510b57cec5SDimitry Andric
9520b57cec5SDimitry Andric//
9530b57cec5SDimitry Andric// Trunc
9540b57cec5SDimitry Andric//
9550b57cec5SDimitry Andric
9560b57cec5SDimitry Andricdef : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a),
9570b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
9580b57cec5SDimitry Andricdef : Pat<(int_nvvm_trunc_f Float32Regs:$a),
9590b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>;
9600b57cec5SDimitry Andricdef : Pat<(int_nvvm_trunc_d Float64Regs:$a),
9610b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
9620b57cec5SDimitry Andric
9630b57cec5SDimitry Andric//
9640b57cec5SDimitry Andric// Saturate
9650b57cec5SDimitry Andric//
9660b57cec5SDimitry Andric
9670b57cec5SDimitry Andricdef : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a),
9680b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>;
9690b57cec5SDimitry Andricdef : Pat<(int_nvvm_saturate_f Float32Regs:$a),
9700b57cec5SDimitry Andric          (CVT_f32_f32 Float32Regs:$a, CvtSAT)>;
9710b57cec5SDimitry Andricdef : Pat<(int_nvvm_saturate_d Float64Regs:$a),
9720b57cec5SDimitry Andric          (CVT_f64_f64 Float64Regs:$a, CvtSAT)>;
9730b57cec5SDimitry Andric
9740b57cec5SDimitry Andric//
9750b57cec5SDimitry Andric// Exp2  Log2
9760b57cec5SDimitry Andric//
9770b57cec5SDimitry Andric
9780b57cec5SDimitry Andricdef INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;",
9790b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>;
9800b57cec5SDimitry Andricdef INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
9810b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>;
9820b57cec5SDimitry Andricdef INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
9830b57cec5SDimitry Andric  Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
98481ad6265SDimitry Andricdef INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
98506c3fb27SDimitry Andric  Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
98681ad6265SDimitry Andricdef INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
98706c3fb27SDimitry Andric  Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
9880b57cec5SDimitry Andric
9890b57cec5SDimitry Andricdef INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
9900b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
9910b57cec5SDimitry Andricdef INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;",
9920b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>;
9930b57cec5SDimitry Andricdef INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;",
9940b57cec5SDimitry Andric  Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>;
9950b57cec5SDimitry Andric
9960b57cec5SDimitry Andric//
9970b57cec5SDimitry Andric// Sin  Cos
9980b57cec5SDimitry Andric//
9990b57cec5SDimitry Andric
10000b57cec5SDimitry Andricdef INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;",
10010b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>;
10020b57cec5SDimitry Andricdef INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;",
10030b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_sin_approx_f>;
10040b57cec5SDimitry Andric
10050b57cec5SDimitry Andricdef INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
10060b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>;
10070b57cec5SDimitry Andricdef INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
10080b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;
10090b57cec5SDimitry Andric
10100b57cec5SDimitry Andric//
10110b57cec5SDimitry Andric// Fma
10120b57cec5SDimitry Andric//
10130b57cec5SDimitry Andric
101481ad6265SDimitry Andricclass FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
101581ad6265SDimitry Andric                list<Predicate> Preds = []> {
101681ad6265SDimitry Andric  string Variant = V;
101781ad6265SDimitry Andric  Intrinsic Intr = I;
101881ad6265SDimitry Andric  NVPTXRegClass RegClass = RC;
101981ad6265SDimitry Andric  list<Predicate> Predicates = Preds;
102081ad6265SDimitry Andric}
10210b57cec5SDimitry Andric
102281ad6265SDimitry Andricmulticlass FMA_INST {
102381ad6265SDimitry Andric  foreach P = [
102481ad6265SDimitry Andric    FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>,
102581ad6265SDimitry Andric    FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>,
102681ad6265SDimitry Andric    FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>,
102781ad6265SDimitry Andric    FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>,
102881ad6265SDimitry Andric
102981ad6265SDimitry Andric    FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>,
103081ad6265SDimitry Andric    FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>,
103181ad6265SDimitry Andric    FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>,
103281ad6265SDimitry Andric    FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>,
103381ad6265SDimitry Andric    FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>,
103481ad6265SDimitry Andric    FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>,
103581ad6265SDimitry Andric    FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
103681ad6265SDimitry Andric    FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
103781ad6265SDimitry Andric
103806c3fb27SDimitry Andric    FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
103906c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
104006c3fb27SDimitry Andric      [hasPTX<42>, hasSM<53>]>,
104106c3fb27SDimitry Andric    FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
104206c3fb27SDimitry Andric      [hasPTX<42>, hasSM<53>]>,
104306c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
104406c3fb27SDimitry Andric      [hasPTX<42>, hasSM<53>]>,
104506c3fb27SDimitry Andric    FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
104606c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
104706c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
104806c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
104981ad6265SDimitry Andric
105006c3fb27SDimitry Andric    FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
105106c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
105206c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
105306c3fb27SDimitry Andric    FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
105406c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
105506c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
105606c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
105781ad6265SDimitry Andric    FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
105806c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
105906c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
106006c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
106181ad6265SDimitry Andric
106206c3fb27SDimitry Andric    FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
106306c3fb27SDimitry Andric      [hasPTX<42>, hasSM<53>]>,
106406c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
106506c3fb27SDimitry Andric      [hasPTX<42>, hasSM<53>]>,
106606c3fb27SDimitry Andric    FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
106706c3fb27SDimitry Andric      [hasPTX<42>, hasSM<53>]>,
106806c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
106906c3fb27SDimitry Andric      Int32Regs, [hasPTX<42>, hasSM<53>]>,
107006c3fb27SDimitry Andric    FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
107106c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
107206c3fb27SDimitry Andric    FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
107306c3fb27SDimitry Andric      Int32Regs, [hasPTX<70>, hasSM<80>]>,
107481ad6265SDimitry Andric    FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
107506c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>,
107681ad6265SDimitry Andric    FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
107706c3fb27SDimitry Andric      [hasPTX<70>, hasSM<80>]>
107881ad6265SDimitry Andric  ] in {
107981ad6265SDimitry Andric    def P.Variant :
108081ad6265SDimitry Andric      F_MATH_3<!strconcat("fma",
108181ad6265SDimitry Andric        !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"),
108281ad6265SDimitry Andric        P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
108381ad6265SDimitry Andric  }
108481ad6265SDimitry Andric}
108581ad6265SDimitry Andric
108681ad6265SDimitry Andricdefm INT_NVVM_FMA : FMA_INST;
10870b57cec5SDimitry Andric
10880b57cec5SDimitry Andric//
10890b57cec5SDimitry Andric// Rcp
10900b57cec5SDimitry Andric//
10910b57cec5SDimitry Andric
10920b57cec5SDimitry Andricdef INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;",
10930b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>;
10940b57cec5SDimitry Andricdef INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;",
10950b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>;
10960b57cec5SDimitry Andricdef INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;",
10970b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>;
10980b57cec5SDimitry Andricdef INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;",
10990b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>;
11000b57cec5SDimitry Andricdef INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;",
11010b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>;
11020b57cec5SDimitry Andricdef INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;",
11030b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>;
11040b57cec5SDimitry Andricdef INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;",
11050b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>;
11060b57cec5SDimitry Andricdef INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;",
11070b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>;
11080b57cec5SDimitry Andric
11090b57cec5SDimitry Andricdef INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs,
11100b57cec5SDimitry Andric  Float64Regs, int_nvvm_rcp_rn_d>;
11110b57cec5SDimitry Andricdef INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs,
11120b57cec5SDimitry Andric  Float64Regs, int_nvvm_rcp_rz_d>;
11130b57cec5SDimitry Andricdef INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs,
11140b57cec5SDimitry Andric  Float64Regs, int_nvvm_rcp_rm_d>;
11150b57cec5SDimitry Andricdef INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs,
11160b57cec5SDimitry Andric  Float64Regs, int_nvvm_rcp_rp_d>;
11170b57cec5SDimitry Andric
111881ad6265SDimitry Andricdef INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;",
111981ad6265SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>;
11200b57cec5SDimitry Andricdef INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;",
11210b57cec5SDimitry Andric  Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>;
11220b57cec5SDimitry Andric
11230b57cec5SDimitry Andric//
11240b57cec5SDimitry Andric// Sqrt
11250b57cec5SDimitry Andric//
11260b57cec5SDimitry Andric
11270b57cec5SDimitry Andricdef INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;",
11280b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>;
11290b57cec5SDimitry Andricdef INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs,
11300b57cec5SDimitry Andric  Float32Regs, int_nvvm_sqrt_rn_f>;
11310b57cec5SDimitry Andricdef INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;",
11320b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>;
11330b57cec5SDimitry Andricdef INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs,
11340b57cec5SDimitry Andric  Float32Regs, int_nvvm_sqrt_rz_f>;
11350b57cec5SDimitry Andricdef INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;",
11360b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>;
11370b57cec5SDimitry Andricdef INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs,
11380b57cec5SDimitry Andric  Float32Regs, int_nvvm_sqrt_rm_f>;
11390b57cec5SDimitry Andricdef INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;",
11400b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>;
11410b57cec5SDimitry Andricdef INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs,
11420b57cec5SDimitry Andric  Float32Regs, int_nvvm_sqrt_rp_f>;
11430b57cec5SDimitry Andricdef INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;",
11440b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>;
11450b57cec5SDimitry Andricdef INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;",
11460b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>;
11470b57cec5SDimitry Andric
11480b57cec5SDimitry Andricdef INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs,
11490b57cec5SDimitry Andric  Float64Regs, int_nvvm_sqrt_rn_d>;
11500b57cec5SDimitry Andricdef INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs,
11510b57cec5SDimitry Andric  Float64Regs, int_nvvm_sqrt_rz_d>;
11520b57cec5SDimitry Andricdef INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs,
11530b57cec5SDimitry Andric  Float64Regs, int_nvvm_sqrt_rm_d>;
11540b57cec5SDimitry Andricdef INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs,
11550b57cec5SDimitry Andric  Float64Regs, int_nvvm_sqrt_rp_d>;
11560b57cec5SDimitry Andric
11570b57cec5SDimitry Andric// nvvm_sqrt intrinsic
11580b57cec5SDimitry Andricdef : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
11590b57cec5SDimitry Andric          (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>;
11600b57cec5SDimitry Andricdef : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
11610b57cec5SDimitry Andric          (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>;
11620b57cec5SDimitry Andricdef : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
11630b57cec5SDimitry Andric          (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>;
11640b57cec5SDimitry Andricdef : Pat<(int_nvvm_sqrt_f Float32Regs:$a),
11650b57cec5SDimitry Andric          (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>;
11660b57cec5SDimitry Andric
11670b57cec5SDimitry Andric//
11680b57cec5SDimitry Andric// Rsqrt
11690b57cec5SDimitry Andric//
11700b57cec5SDimitry Andric
11710b57cec5SDimitry Andricdef INT_NVVM_RSQRT_APPROX_FTZ_F
11720b57cec5SDimitry Andric  : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs,
11730b57cec5SDimitry Andric    int_nvvm_rsqrt_approx_ftz_f>;
1174*0fca6ea1SDimitry Andricdef INT_NVVM_RSQRT_APPROX_FTZ_D
1175*0fca6ea1SDimitry Andric  : F_MATH_1<"rsqrt.approx.ftz.f64 \t$dst, $src0;", Float64Regs, Float64Regs,
1176*0fca6ea1SDimitry Andric    int_nvvm_rsqrt_approx_ftz_d>;
1177*0fca6ea1SDimitry Andric
11780b57cec5SDimitry Andricdef INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;",
11790b57cec5SDimitry Andric  Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>;
11800b57cec5SDimitry Andricdef INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;",
11810b57cec5SDimitry Andric  Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>;
11820b57cec5SDimitry Andric
1183*0fca6ea1SDimitry Andric// 1.0f / sqrt_approx -> rsqrt_approx
1184*0fca6ea1SDimitry Andricdef: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f Float32Regs:$a)),
1185*0fca6ea1SDimitry Andric         (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1186*0fca6ea1SDimitry Andric         Requires<[doRsqrtOpt]>;
1187*0fca6ea1SDimitry Andricdef: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f Float32Regs:$a)),
1188*0fca6ea1SDimitry Andric         (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1189*0fca6ea1SDimitry Andric         Requires<[doRsqrtOpt]>;
1190*0fca6ea1SDimitry Andric// same for int_nvvm_sqrt_f when non-precision sqrt is requested
1191*0fca6ea1SDimitry Andricdef: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)),
1192*0fca6ea1SDimitry Andric         (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1193*0fca6ea1SDimitry Andric         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1194*0fca6ea1SDimitry Andricdef: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$a)),
1195*0fca6ea1SDimitry Andric         (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1196*0fca6ea1SDimitry Andric         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
1197*0fca6ea1SDimitry Andric
1198*0fca6ea1SDimitry Andricdef: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)),
1199*0fca6ea1SDimitry Andric         (INT_NVVM_RSQRT_APPROX_F Float32Regs:$a)>,
1200*0fca6ea1SDimitry Andric         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>;
1201*0fca6ea1SDimitry Andricdef: Pat<(fdiv FloatConst1, (fsqrt Float32Regs:$a)),
1202*0fca6ea1SDimitry Andric         (INT_NVVM_RSQRT_APPROX_FTZ_F Float32Regs:$a)>,
1203*0fca6ea1SDimitry Andric         Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>;
12040b57cec5SDimitry Andric//
12050b57cec5SDimitry Andric// Add
12060b57cec5SDimitry Andric//
12070b57cec5SDimitry Andric
12080b57cec5SDimitry Andricdef INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;",
12090b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>;
12100b57cec5SDimitry Andricdef INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;",
12110b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>;
12120b57cec5SDimitry Andricdef INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;",
12130b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>;
12140b57cec5SDimitry Andricdef INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;",
12150b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>;
12160b57cec5SDimitry Andricdef INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;",
12170b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>;
12180b57cec5SDimitry Andricdef INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;",
12190b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>;
12200b57cec5SDimitry Andricdef INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;",
12210b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>;
12220b57cec5SDimitry Andricdef INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;",
12230b57cec5SDimitry Andric  Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>;
12240b57cec5SDimitry Andric
12250b57cec5SDimitry Andricdef INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;",
12260b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>;
12270b57cec5SDimitry Andricdef INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;",
12280b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>;
12290b57cec5SDimitry Andricdef INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;",
12300b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>;
12310b57cec5SDimitry Andricdef INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;",
12320b57cec5SDimitry Andric  Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>;
12330b57cec5SDimitry Andric
12340b57cec5SDimitry Andric//
12350b57cec5SDimitry Andric// Convert
12360b57cec5SDimitry Andric//
12370b57cec5SDimitry Andric
12380b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a),
12390b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>;
12400b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2f_rn Float64Regs:$a),
12410b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
12420b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a),
12430b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>;
12440b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2f_rz Float64Regs:$a),
12450b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRZ)>;
12460b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a),
12470b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>;
12480b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2f_rm Float64Regs:$a),
12490b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRM)>;
12500b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a),
12510b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>;
12520b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2f_rp Float64Regs:$a),
12530b57cec5SDimitry Andric          (CVT_f32_f64 Float64Regs:$a, CvtRP)>;
12540b57cec5SDimitry Andric
12550b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2i_rn Float64Regs:$a),
12560b57cec5SDimitry Andric          (CVT_s32_f64 Float64Regs:$a, CvtRNI)>;
12570b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2i_rz Float64Regs:$a),
12580b57cec5SDimitry Andric          (CVT_s32_f64 Float64Regs:$a, CvtRZI)>;
12590b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2i_rm Float64Regs:$a),
12600b57cec5SDimitry Andric          (CVT_s32_f64 Float64Regs:$a, CvtRMI)>;
12610b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2i_rp Float64Regs:$a),
12620b57cec5SDimitry Andric          (CVT_s32_f64 Float64Regs:$a, CvtRPI)>;
12630b57cec5SDimitry Andric
12640b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ui_rn Float64Regs:$a),
12650b57cec5SDimitry Andric          (CVT_u32_f64 Float64Regs:$a, CvtRNI)>;
12660b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ui_rz Float64Regs:$a),
12670b57cec5SDimitry Andric          (CVT_u32_f64 Float64Regs:$a, CvtRZI)>;
12680b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ui_rm Float64Regs:$a),
12690b57cec5SDimitry Andric          (CVT_u32_f64 Float64Regs:$a, CvtRMI)>;
12700b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ui_rp Float64Regs:$a),
12710b57cec5SDimitry Andric          (CVT_u32_f64 Float64Regs:$a, CvtRPI)>;
12720b57cec5SDimitry Andric
12730b57cec5SDimitry Andricdef : Pat<(int_nvvm_i2d_rn Int32Regs:$a),
12740b57cec5SDimitry Andric          (CVT_f64_s32 Int32Regs:$a, CvtRN)>;
12750b57cec5SDimitry Andricdef : Pat<(int_nvvm_i2d_rz Int32Regs:$a),
12760b57cec5SDimitry Andric          (CVT_f64_s32 Int32Regs:$a, CvtRZ)>;
12770b57cec5SDimitry Andricdef : Pat<(int_nvvm_i2d_rm Int32Regs:$a),
12780b57cec5SDimitry Andric          (CVT_f64_s32 Int32Regs:$a, CvtRM)>;
12790b57cec5SDimitry Andricdef : Pat<(int_nvvm_i2d_rp Int32Regs:$a),
12800b57cec5SDimitry Andric          (CVT_f64_s32 Int32Regs:$a, CvtRP)>;
12810b57cec5SDimitry Andric
12820b57cec5SDimitry Andricdef : Pat<(int_nvvm_ui2d_rn Int32Regs:$a),
12830b57cec5SDimitry Andric          (CVT_f64_u32 Int32Regs:$a, CvtRN)>;
12840b57cec5SDimitry Andricdef : Pat<(int_nvvm_ui2d_rz Int32Regs:$a),
12850b57cec5SDimitry Andric          (CVT_f64_u32 Int32Regs:$a, CvtRZ)>;
12860b57cec5SDimitry Andricdef : Pat<(int_nvvm_ui2d_rm Int32Regs:$a),
12870b57cec5SDimitry Andric          (CVT_f64_u32 Int32Regs:$a, CvtRM)>;
12880b57cec5SDimitry Andricdef : Pat<(int_nvvm_ui2d_rp Int32Regs:$a),
12890b57cec5SDimitry Andric          (CVT_f64_u32 Int32Regs:$a, CvtRP)>;
12900b57cec5SDimitry Andric
12910b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a),
12920b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
12930b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2i_rn Float32Regs:$a),
12940b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRNI)>;
12950b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a),
12960b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
12970b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2i_rz Float32Regs:$a),
12980b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRZI)>;
12990b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a),
13000b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
13010b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2i_rm Float32Regs:$a),
13020b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRMI)>;
13030b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a),
13040b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
13050b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2i_rp Float32Regs:$a),
13060b57cec5SDimitry Andric          (CVT_s32_f32 Float32Regs:$a, CvtRPI)>;
13070b57cec5SDimitry Andric
13080b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a),
13090b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>;
13100b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ui_rn Float32Regs:$a),
13110b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRNI)>;
13120b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a),
13130b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>;
13140b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ui_rz Float32Regs:$a),
13150b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRZI)>;
13160b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a),
13170b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>;
13180b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ui_rm Float32Regs:$a),
13190b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRMI)>;
13200b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a),
13210b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>;
13220b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ui_rp Float32Regs:$a),
13230b57cec5SDimitry Andric          (CVT_u32_f32 Float32Regs:$a, CvtRPI)>;
13240b57cec5SDimitry Andric
13250b57cec5SDimitry Andricdef : Pat<(int_nvvm_i2f_rn Int32Regs:$a),
13260b57cec5SDimitry Andric          (CVT_f32_s32 Int32Regs:$a, CvtRN)>;
13270b57cec5SDimitry Andricdef : Pat<(int_nvvm_i2f_rz Int32Regs:$a),
13280b57cec5SDimitry Andric          (CVT_f32_s32 Int32Regs:$a, CvtRZ)>;
13290b57cec5SDimitry Andricdef : Pat<(int_nvvm_i2f_rm Int32Regs:$a),
13300b57cec5SDimitry Andric          (CVT_f32_s32 Int32Regs:$a, CvtRM)>;
13310b57cec5SDimitry Andricdef : Pat<(int_nvvm_i2f_rp Int32Regs:$a),
13320b57cec5SDimitry Andric          (CVT_f32_s32 Int32Regs:$a, CvtRP)>;
13330b57cec5SDimitry Andric
13340b57cec5SDimitry Andricdef : Pat<(int_nvvm_ui2f_rn Int32Regs:$a),
13350b57cec5SDimitry Andric          (CVT_f32_u32 Int32Regs:$a, CvtRN)>;
13360b57cec5SDimitry Andricdef : Pat<(int_nvvm_ui2f_rz Int32Regs:$a),
13370b57cec5SDimitry Andric          (CVT_f32_u32 Int32Regs:$a, CvtRZ)>;
13380b57cec5SDimitry Andricdef : Pat<(int_nvvm_ui2f_rm Int32Regs:$a),
13390b57cec5SDimitry Andric          (CVT_f32_u32 Int32Regs:$a, CvtRM)>;
13400b57cec5SDimitry Andricdef : Pat<(int_nvvm_ui2f_rp Int32Regs:$a),
13410b57cec5SDimitry Andric          (CVT_f32_u32 Int32Regs:$a, CvtRP)>;
13420b57cec5SDimitry Andric
134304eeddc0SDimitry Andricdef : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b),
134404eeddc0SDimitry Andric          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
134504eeddc0SDimitry Andricdef : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
134604eeddc0SDimitry Andric          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
134704eeddc0SDimitry Andricdef : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b),
134804eeddc0SDimitry Andric          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
134904eeddc0SDimitry Andricdef : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
135004eeddc0SDimitry Andric          (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
135104eeddc0SDimitry Andric
135204eeddc0SDimitry Andricdef : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b),
135304eeddc0SDimitry Andric          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>;
135404eeddc0SDimitry Andricdef : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b),
135504eeddc0SDimitry Andric          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>;
135604eeddc0SDimitry Andricdef : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b),
135704eeddc0SDimitry Andric          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>;
135804eeddc0SDimitry Andricdef : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b),
135904eeddc0SDimitry Andric          (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>;
136004eeddc0SDimitry Andric
136104eeddc0SDimitry Andricdef : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a),
136204eeddc0SDimitry Andric          (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
136304eeddc0SDimitry Andricdef : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a),
136404eeddc0SDimitry Andric          (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>;
136504eeddc0SDimitry Andricdef : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a),
136604eeddc0SDimitry Andric          (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>;
136704eeddc0SDimitry Andricdef : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a),
136804eeddc0SDimitry Andric          (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>;
136904eeddc0SDimitry Andric
137004eeddc0SDimitry Andricdef CVT_tf32_f32 :
137104eeddc0SDimitry Andric   NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a),
137204eeddc0SDimitry Andric                   "cvt.rna.tf32.f32 \t$dest, $a;",
137304eeddc0SDimitry Andric       [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>;
137404eeddc0SDimitry Andric
13750b57cec5SDimitry Andricdef INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};",
13760b57cec5SDimitry Andric  Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>;
13770b57cec5SDimitry Andric
13780b57cec5SDimitry Andricdef INT_NVVM_D2I_LO : F_MATH_1<
13790b57cec5SDimitry Andric  !strconcat("{{\n\t",
13800b57cec5SDimitry Andric             ".reg .b32 %temp; \n\t",
13810b57cec5SDimitry Andric             "mov.b64 \t{$dst, %temp}, $src0;\n\t",
13820b57cec5SDimitry Andric             "}}"),
13830b57cec5SDimitry Andric  Int32Regs, Float64Regs, int_nvvm_d2i_lo>;
13840b57cec5SDimitry Andricdef INT_NVVM_D2I_HI : F_MATH_1<
13850b57cec5SDimitry Andric  !strconcat("{{\n\t",
13860b57cec5SDimitry Andric             ".reg .b32 %temp; \n\t",
13870b57cec5SDimitry Andric             "mov.b64 \t{%temp, $dst}, $src0;\n\t",
13880b57cec5SDimitry Andric             "}}"),
13890b57cec5SDimitry Andric  Int32Regs, Float64Regs, int_nvvm_d2i_hi>;
13900b57cec5SDimitry Andric
13910b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a),
13920b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
13930b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ll_rn Float32Regs:$a),
13940b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRNI)>;
13950b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a),
13960b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
13970b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ll_rz Float32Regs:$a),
13980b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRZI)>;
13990b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a),
14000b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
14010b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ll_rm Float32Regs:$a),
14020b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRMI)>;
14030b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a),
14040b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
14050b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ll_rp Float32Regs:$a),
14060b57cec5SDimitry Andric          (CVT_s64_f32 Float32Regs:$a, CvtRPI)>;
14070b57cec5SDimitry Andric
14080b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a),
14090b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>;
14100b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ull_rn Float32Regs:$a),
14110b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRNI)>;
14120b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a),
14130b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>;
14140b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ull_rz Float32Regs:$a),
14150b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRZI)>;
14160b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a),
14170b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>;
14180b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ull_rm Float32Regs:$a),
14190b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRMI)>;
14200b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a),
14210b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>;
14220b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2ull_rp Float32Regs:$a),
14230b57cec5SDimitry Andric          (CVT_u64_f32 Float32Regs:$a, CvtRPI)>;
14240b57cec5SDimitry Andric
14250b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ll_rn Float64Regs:$a),
14260b57cec5SDimitry Andric          (CVT_s64_f64 Float64Regs:$a, CvtRNI)>;
14270b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ll_rz Float64Regs:$a),
14280b57cec5SDimitry Andric          (CVT_s64_f64 Float64Regs:$a, CvtRZI)>;
14290b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ll_rm Float64Regs:$a),
14300b57cec5SDimitry Andric          (CVT_s64_f64 Float64Regs:$a, CvtRMI)>;
14310b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ll_rp Float64Regs:$a),
14320b57cec5SDimitry Andric          (CVT_s64_f64 Float64Regs:$a, CvtRPI)>;
14330b57cec5SDimitry Andric
14340b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ull_rn Float64Regs:$a),
14350b57cec5SDimitry Andric          (CVT_u64_f64 Float64Regs:$a, CvtRNI)>;
14360b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ull_rz Float64Regs:$a),
14370b57cec5SDimitry Andric          (CVT_u64_f64 Float64Regs:$a, CvtRZI)>;
14380b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ull_rm Float64Regs:$a),
14390b57cec5SDimitry Andric          (CVT_u64_f64 Float64Regs:$a, CvtRMI)>;
14400b57cec5SDimitry Andricdef : Pat<(int_nvvm_d2ull_rp Float64Regs:$a),
14410b57cec5SDimitry Andric          (CVT_u64_f64 Float64Regs:$a, CvtRPI)>;
14420b57cec5SDimitry Andric
14430b57cec5SDimitry Andricdef : Pat<(int_nvvm_ll2f_rn Int64Regs:$a),
14440b57cec5SDimitry Andric          (CVT_f32_s64 Int64Regs:$a, CvtRN)>;
14450b57cec5SDimitry Andricdef : Pat<(int_nvvm_ll2f_rz Int64Regs:$a),
14460b57cec5SDimitry Andric          (CVT_f32_s64 Int64Regs:$a, CvtRZ)>;
14470b57cec5SDimitry Andricdef : Pat<(int_nvvm_ll2f_rm Int64Regs:$a),
14480b57cec5SDimitry Andric          (CVT_f32_s64 Int64Regs:$a, CvtRM)>;
14490b57cec5SDimitry Andricdef : Pat<(int_nvvm_ll2f_rp Int64Regs:$a),
14500b57cec5SDimitry Andric          (CVT_f32_s64 Int64Regs:$a, CvtRP)>;
14510b57cec5SDimitry Andric
14520b57cec5SDimitry Andricdef : Pat<(int_nvvm_ull2f_rn Int64Regs:$a),
14530b57cec5SDimitry Andric          (CVT_f32_u64 Int64Regs:$a, CvtRN)>;
14540b57cec5SDimitry Andricdef : Pat<(int_nvvm_ull2f_rz Int64Regs:$a),
14550b57cec5SDimitry Andric          (CVT_f32_u64 Int64Regs:$a, CvtRZ)>;
14560b57cec5SDimitry Andricdef : Pat<(int_nvvm_ull2f_rm Int64Regs:$a),
14570b57cec5SDimitry Andric          (CVT_f32_u64 Int64Regs:$a, CvtRM)>;
14580b57cec5SDimitry Andricdef : Pat<(int_nvvm_ull2f_rp Int64Regs:$a),
14590b57cec5SDimitry Andric          (CVT_f32_u64 Int64Regs:$a, CvtRP)>;
14600b57cec5SDimitry Andric
14610b57cec5SDimitry Andricdef : Pat<(int_nvvm_ll2d_rn Int64Regs:$a),
14620b57cec5SDimitry Andric          (CVT_f64_s64 Int64Regs:$a, CvtRN)>;
14630b57cec5SDimitry Andricdef : Pat<(int_nvvm_ll2d_rz Int64Regs:$a),
14640b57cec5SDimitry Andric          (CVT_f64_s64 Int64Regs:$a, CvtRZ)>;
14650b57cec5SDimitry Andricdef : Pat<(int_nvvm_ll2d_rm Int64Regs:$a),
14660b57cec5SDimitry Andric          (CVT_f64_s64 Int64Regs:$a, CvtRM)>;
14670b57cec5SDimitry Andricdef : Pat<(int_nvvm_ll2d_rp Int64Regs:$a),
14680b57cec5SDimitry Andric          (CVT_f64_s64 Int64Regs:$a, CvtRP)>;
14690b57cec5SDimitry Andric
14700b57cec5SDimitry Andricdef : Pat<(int_nvvm_ull2d_rn Int64Regs:$a),
14710b57cec5SDimitry Andric          (CVT_f64_u64 Int64Regs:$a, CvtRN)>;
14720b57cec5SDimitry Andricdef : Pat<(int_nvvm_ull2d_rz Int64Regs:$a),
14730b57cec5SDimitry Andric          (CVT_f64_u64 Int64Regs:$a, CvtRZ)>;
14740b57cec5SDimitry Andricdef : Pat<(int_nvvm_ull2d_rm Int64Regs:$a),
14750b57cec5SDimitry Andric          (CVT_f64_u64 Int64Regs:$a, CvtRM)>;
14760b57cec5SDimitry Andricdef : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
14770b57cec5SDimitry Andric          (CVT_f64_u64 Int64Regs:$a, CvtRP)>;
14780b57cec5SDimitry Andric
14790b57cec5SDimitry Andric
14800b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
148106c3fb27SDimitry Andric          (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
14820b57cec5SDimitry Andricdef : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
148306c3fb27SDimitry Andric          (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
14840b57cec5SDimitry Andric
14850b57cec5SDimitry Andric//
14860b57cec5SDimitry Andric// Bitcast
14870b57cec5SDimitry Andric//
14880b57cec5SDimitry Andric
14890b57cec5SDimitry Andricdef INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs,
14900b57cec5SDimitry Andric  Float32Regs, int_nvvm_bitcast_f2i>;
14910b57cec5SDimitry Andricdef INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs,
14920b57cec5SDimitry Andric  Int32Regs, int_nvvm_bitcast_i2f>;
14930b57cec5SDimitry Andric
14940b57cec5SDimitry Andricdef INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs,
14950b57cec5SDimitry Andric  Int64Regs, int_nvvm_bitcast_ll2d>;
14960b57cec5SDimitry Andricdef INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs,
14970b57cec5SDimitry Andric  Float64Regs, int_nvvm_bitcast_d2ll>;
14980b57cec5SDimitry Andric
14990b57cec5SDimitry Andric//
15000b57cec5SDimitry Andric// FNS
15010b57cec5SDimitry Andric//
15020b57cec5SDimitry Andric
15030b57cec5SDimitry Andricclass INT_FNS_MBO<dag ins, dag Operands>
15040b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$dst), ins,
15050b57cec5SDimitry Andric               "fns.b32 \t$dst, $mask, $base, $offset;",
15060b57cec5SDimitry Andric               [(set Int32Regs:$dst, Operands )]>,
150706c3fb27SDimitry Andric    Requires<[hasPTX<60>, hasSM<30>]>;
15080b57cec5SDimitry Andric
15090b57cec5SDimitry Andricdef INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
15100b57cec5SDimitry Andric                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
15110b57cec5SDimitry Andricdef INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base,    i32imm:$offset),
15120b57cec5SDimitry Andric                     (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base,       imm:$offset)>;
15130b57cec5SDimitry Andricdef INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base, Int32Regs:$offset),
15140b57cec5SDimitry Andric                     (int_nvvm_fns Int32Regs:$mask,       imm:$base, Int32Regs:$offset)>;
15150b57cec5SDimitry Andricdef INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask,    i32imm:$base,    i32imm:$offset),
15160b57cec5SDimitry Andric                     (int_nvvm_fns Int32Regs:$mask,       imm:$base,       imm:$offset)>;
15170b57cec5SDimitry Andricdef INT_FNS_irr : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base, Int32Regs:$offset),
15180b57cec5SDimitry Andric                     (int_nvvm_fns       imm:$mask, Int32Regs:$base, Int32Regs:$offset)>;
15190b57cec5SDimitry Andricdef INT_FNS_iri : INT_FNS_MBO<(ins    i32imm:$mask, Int32Regs:$base,    i32imm:$offset),
15200b57cec5SDimitry Andric                     (int_nvvm_fns       imm:$mask, Int32Regs:$base,       imm:$offset)>;
15210b57cec5SDimitry Andricdef INT_FNS_iir : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base, Int32Regs:$offset),
15220b57cec5SDimitry Andric                     (int_nvvm_fns       imm:$mask,       imm:$base, Int32Regs:$offset)>;
15230b57cec5SDimitry Andricdef INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$offset),
15240b57cec5SDimitry Andric                     (int_nvvm_fns       imm:$mask,       imm:$base,       imm:$offset)>;
15250b57cec5SDimitry Andric
15260b57cec5SDimitry Andric//-----------------------------------
15270b57cec5SDimitry Andric// Atomic Functions
15280b57cec5SDimitry Andric//-----------------------------------
15290b57cec5SDimitry Andric
15300b57cec5SDimitry Andricclass ATOMIC_GLOBAL_CHK <dag ops, dag frag>
15310b57cec5SDimitry Andric : PatFrag<ops, frag, AS_match.global>;
15320b57cec5SDimitry Andricclass ATOMIC_SHARED_CHK <dag ops, dag frag>
15330b57cec5SDimitry Andric : PatFrag<ops, frag, AS_match.shared>;
15340b57cec5SDimitry Andricclass ATOMIC_GENERIC_CHK <dag ops, dag frag>
15350b57cec5SDimitry Andric : PatFrag<ops, frag, AS_match.generic>;
15360b57cec5SDimitry Andric
15375f757f3fSDimitry Andricmulticlass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
15385f757f3fSDimitry Andric  ValueType regT, NVPTXRegClass regclass,
15390b57cec5SDimitry Andric  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
15400b57cec5SDimitry Andric  Operand IMMType, SDNode IMM, list<Predicate> Pred> {
15410b57cec5SDimitry Andric  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
15420b57cec5SDimitry Andric    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
15435f757f3fSDimitry Andric    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
15440b57cec5SDimitry Andric  Requires<Pred>;
15450b57cec5SDimitry Andric  def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
15460b57cec5SDimitry Andric    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
15475f757f3fSDimitry Andric    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
1548*0fca6ea1SDimitry Andric  Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
15490b57cec5SDimitry Andric}
15505f757f3fSDimitry Andricmulticlass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
15510b57cec5SDimitry Andric  string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
15520b57cec5SDimitry Andric  list<Predicate> Pred = []> {
15535f757f3fSDimitry Andric  defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
15540b57cec5SDimitry Andric    IntOp, IMMType, IMM, Pred>;
15555f757f3fSDimitry Andric  defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
15560b57cec5SDimitry Andric    IntOp, IMMType, IMM, Pred>;
15570b57cec5SDimitry Andric}
15580b57cec5SDimitry Andric
15590b57cec5SDimitry Andric// has 2 operands, neg the second one
15605f757f3fSDimitry Andricmulticlass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
15615f757f3fSDimitry Andric  ValueType regT, NVPTXRegClass regclass,
15620b57cec5SDimitry Andric  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
1563349cc55cSDimitry Andric  list<Predicate> Pred> {
15640b57cec5SDimitry Andric  def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
15650b57cec5SDimitry Andric    !strconcat(
15660b57cec5SDimitry Andric      "{{ \n\t",
15670b57cec5SDimitry Andric      ".reg \t.s", TypeStr, " temp; \n\t",
15680b57cec5SDimitry Andric      "neg.s", TypeStr, " \ttemp, $b; \n\t",
15690b57cec5SDimitry Andric      "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
15700b57cec5SDimitry Andric      "}}"),
15715f757f3fSDimitry Andric    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
15720b57cec5SDimitry Andric  Requires<Pred>;
15730b57cec5SDimitry Andric}
15745f757f3fSDimitry Andricmulticlass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
1575349cc55cSDimitry Andric  string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
15765f757f3fSDimitry Andric defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1577349cc55cSDimitry Andric   IntOp, Pred> ;
15785f757f3fSDimitry Andric defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
1579349cc55cSDimitry Andric   IntOp, Pred> ;
15800b57cec5SDimitry Andric}
15810b57cec5SDimitry Andric
15820b57cec5SDimitry Andric// has 3 operands
15835f757f3fSDimitry Andricmulticlass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
15845f757f3fSDimitry Andric  ValueType regT, NVPTXRegClass regclass,
15850b57cec5SDimitry Andric  string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
15860b57cec5SDimitry Andric  Operand IMMType, list<Predicate> Pred> {
15870b57cec5SDimitry Andric  def reg : NVPTXInst<(outs regclass:$dst),
15880b57cec5SDimitry Andric    (ins ptrclass:$addr, regclass:$b, regclass:$c),
15890b57cec5SDimitry Andric    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
15905f757f3fSDimitry Andric    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
15910b57cec5SDimitry Andric  Requires<Pred>;
15920b57cec5SDimitry Andric
15930b57cec5SDimitry Andric  def imm1 : NVPTXInst<(outs regclass:$dst),
15940b57cec5SDimitry Andric    (ins ptrclass:$addr, IMMType:$b, regclass:$c),
15950b57cec5SDimitry Andric    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
15965f757f3fSDimitry Andric    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
15970b57cec5SDimitry Andric  Requires<Pred>;
15980b57cec5SDimitry Andric
15990b57cec5SDimitry Andric  def imm2 : NVPTXInst<(outs regclass:$dst),
16000b57cec5SDimitry Andric    (ins ptrclass:$addr, regclass:$b, IMMType:$c),
16010b57cec5SDimitry Andric    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
16025f757f3fSDimitry Andric    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
16030b57cec5SDimitry Andric  Requires<Pred>;
16040b57cec5SDimitry Andric
16050b57cec5SDimitry Andric  def imm3 : NVPTXInst<(outs regclass:$dst),
16060b57cec5SDimitry Andric    (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
16070b57cec5SDimitry Andric    !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
16085f757f3fSDimitry Andric    [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
16090b57cec5SDimitry Andric  Requires<Pred>;
16100b57cec5SDimitry Andric}
16115f757f3fSDimitry Andricmulticlass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
16120b57cec5SDimitry Andric  string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
16135f757f3fSDimitry Andric  defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
16140b57cec5SDimitry Andric    IntOp, IMMType, Pred>;
16155f757f3fSDimitry Andric  defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr,
16160b57cec5SDimitry Andric    IntOp, IMMType, Pred>;
16170b57cec5SDimitry Andric}
16180b57cec5SDimitry Andric
16190b57cec5SDimitry Andric// atom_add
16200b57cec5SDimitry Andric
1621*0fca6ea1SDimitry Andricdef atomic_load_add_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1622*0fca6ea1SDimitry Andric  (atomic_load_add_i32 node:$a, node:$b)>;
1623*0fca6ea1SDimitry Andricdef atomic_load_add_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1624*0fca6ea1SDimitry Andric  (atomic_load_add_i32 node:$a, node:$b)>;
1625*0fca6ea1SDimitry Andricdef atomic_load_add_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1626*0fca6ea1SDimitry Andric  (atomic_load_add_i32 node:$a, node:$b)>;
1627*0fca6ea1SDimitry Andricdef atomic_load_add_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1628*0fca6ea1SDimitry Andric  (atomic_load_add_i64 node:$a, node:$b)>;
1629*0fca6ea1SDimitry Andricdef atomic_load_add_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1630*0fca6ea1SDimitry Andric  (atomic_load_add_i64 node:$a, node:$b)>;
1631*0fca6ea1SDimitry Andricdef atomic_load_add_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1632*0fca6ea1SDimitry Andric  (atomic_load_add_i64 node:$a, node:$b)>;
16330b57cec5SDimitry Andricdef atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
16340b57cec5SDimitry Andric  (atomic_load_fadd node:$a, node:$b)>;
16350b57cec5SDimitry Andricdef atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
16360b57cec5SDimitry Andric  (atomic_load_fadd node:$a, node:$b)>;
16370b57cec5SDimitry Andricdef atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
16380b57cec5SDimitry Andric  (atomic_load_fadd node:$a, node:$b)>;
16390b57cec5SDimitry Andric
16405f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add",
1641*0fca6ea1SDimitry Andric  atomic_load_add_i32_g, i32imm, imm>;
16425f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add",
1643*0fca6ea1SDimitry Andric  atomic_load_add_i32_s, i32imm, imm>;
16445f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add",
1645*0fca6ea1SDimitry Andric  atomic_load_add_i32_gen, i32imm, imm>;
16465f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1647*0fca6ea1SDimitry Andric  ".add", atomic_load_add_i32_gen, i32imm, imm>;
16480b57cec5SDimitry Andric
16495f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add",
1650*0fca6ea1SDimitry Andric  atomic_load_add_i64_g, i64imm, imm>;
16515f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add",
1652*0fca6ea1SDimitry Andric  atomic_load_add_i64_s, i64imm, imm>;
16535f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add",
1654*0fca6ea1SDimitry Andric  atomic_load_add_i64_gen, i64imm, imm>;
16555f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1656*0fca6ea1SDimitry Andric  ".add", atomic_load_add_i64_gen, i64imm, imm>;
1657*0fca6ea1SDimitry Andric
1658*0fca6ea1SDimitry Andricdefm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz",
1659*0fca6ea1SDimitry Andric  atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1660*0fca6ea1SDimitry Andricdefm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz",
1661*0fca6ea1SDimitry Andric  atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1662*0fca6ea1SDimitry Andricdefm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz",
1663*0fca6ea1SDimitry Andric  atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>;
1664*0fca6ea1SDimitry Andric
1665*0fca6ea1SDimitry Andricdefm INT_PTX_ATOM_ADD_G_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".global", ".bf16", ".add.noftz",
1666*0fca6ea1SDimitry Andric  atomic_load_add_g, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
1667*0fca6ea1SDimitry Andricdefm INT_PTX_ATOM_ADD_S_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".shared", ".bf16", ".add.noftz",
1668*0fca6ea1SDimitry Andric  atomic_load_add_s, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
1669*0fca6ea1SDimitry Andricdefm INT_PTX_ATOM_ADD_GEN_BF16 : F_ATOMIC_2<bf16, Int16Regs, "", ".bf16", ".add.noftz",
1670*0fca6ea1SDimitry Andric  atomic_load_add_gen, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>;
16710b57cec5SDimitry Andric
16725f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add",
16730b57cec5SDimitry Andric  atomic_load_add_g, f32imm, fpimm>;
16745f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add",
16750b57cec5SDimitry Andric  atomic_load_add_s, f32imm, fpimm>;
16765f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add",
16770b57cec5SDimitry Andric  atomic_load_add_gen, f32imm, fpimm>;
16780b57cec5SDimitry Andric
16795f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add",
16800b57cec5SDimitry Andric  atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>;
16815f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add",
16820b57cec5SDimitry Andric  atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>;
16835f757f3fSDimitry Andricdefm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add",
16840b57cec5SDimitry Andric  atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>;
16850b57cec5SDimitry Andric
16860b57cec5SDimitry Andric// atom_sub
16870b57cec5SDimitry Andric
1688*0fca6ea1SDimitry Andricdef atomic_load_sub_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1689*0fca6ea1SDimitry Andric  (atomic_load_sub_i32 node:$a, node:$b)>;
1690*0fca6ea1SDimitry Andricdef atomic_load_sub_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1691*0fca6ea1SDimitry Andric  (atomic_load_sub_i32 node:$a, node:$b)>;
1692*0fca6ea1SDimitry Andricdef atomic_load_sub_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1693*0fca6ea1SDimitry Andric  (atomic_load_sub_i32 node:$a, node:$b)>;
1694*0fca6ea1SDimitry Andricdef atomic_load_sub_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1695*0fca6ea1SDimitry Andric  (atomic_load_sub_i64 node:$a, node:$b)>;
1696*0fca6ea1SDimitry Andricdef atomic_load_sub_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1697*0fca6ea1SDimitry Andric  (atomic_load_sub_i64 node:$a, node:$b)>;
1698*0fca6ea1SDimitry Andricdef atomic_load_sub_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1699*0fca6ea1SDimitry Andric  (atomic_load_sub_i64 node:$a, node:$b)>;
17000b57cec5SDimitry Andric
17015f757f3fSDimitry Andricdefm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add",
1702*0fca6ea1SDimitry Andric  atomic_load_sub_i32_g>;
17035f757f3fSDimitry Andricdefm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add",
1704*0fca6ea1SDimitry Andric  atomic_load_sub_i64_g>;
17055f757f3fSDimitry Andricdefm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add",
1706*0fca6ea1SDimitry Andric  atomic_load_sub_i32_gen>;
17075f757f3fSDimitry Andricdefm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32",
1708*0fca6ea1SDimitry Andric  ".add", atomic_load_sub_i32_gen>;
17095f757f3fSDimitry Andricdefm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add",
1710*0fca6ea1SDimitry Andric  atomic_load_sub_i32_s>;
17115f757f3fSDimitry Andricdefm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add",
1712*0fca6ea1SDimitry Andric  atomic_load_sub_i64_s>;
17135f757f3fSDimitry Andricdefm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add",
1714*0fca6ea1SDimitry Andric  atomic_load_sub_i64_gen>;
17155f757f3fSDimitry Andricdefm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64",
1716*0fca6ea1SDimitry Andric  ".add", atomic_load_sub_i64_gen>;
17170b57cec5SDimitry Andric
17180b57cec5SDimitry Andric// atom_swap
17190b57cec5SDimitry Andric
1720*0fca6ea1SDimitry Andricdef atomic_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1721*0fca6ea1SDimitry Andric  (atomic_swap_i32 node:$a, node:$b)>;
1722*0fca6ea1SDimitry Andricdef atomic_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1723*0fca6ea1SDimitry Andric  (atomic_swap_i32 node:$a, node:$b)>;
1724*0fca6ea1SDimitry Andricdef atomic_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1725*0fca6ea1SDimitry Andric  (atomic_swap_i32 node:$a, node:$b)>;
1726*0fca6ea1SDimitry Andricdef atomic_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1727*0fca6ea1SDimitry Andric  (atomic_swap_i64 node:$a, node:$b)>;
1728*0fca6ea1SDimitry Andricdef atomic_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1729*0fca6ea1SDimitry Andric  (atomic_swap_i64 node:$a, node:$b)>;
1730*0fca6ea1SDimitry Andricdef atomic_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1731*0fca6ea1SDimitry Andric  (atomic_swap_i64 node:$a, node:$b)>;
17320b57cec5SDimitry Andric
17335f757f3fSDimitry Andricdefm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch",
1734*0fca6ea1SDimitry Andric  atomic_swap_i32_g, i32imm, imm>;
17355f757f3fSDimitry Andricdefm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch",
1736*0fca6ea1SDimitry Andric  atomic_swap_i32_s, i32imm, imm>;
17375f757f3fSDimitry Andricdefm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch",
1738*0fca6ea1SDimitry Andric  atomic_swap_i32_gen, i32imm, imm>;
17395f757f3fSDimitry Andricdefm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1740*0fca6ea1SDimitry Andric  ".exch", atomic_swap_i32_gen, i32imm, imm>;
17415f757f3fSDimitry Andricdefm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch",
1742*0fca6ea1SDimitry Andric  atomic_swap_i64_g, i64imm, imm>;
17435f757f3fSDimitry Andricdefm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch",
1744*0fca6ea1SDimitry Andric  atomic_swap_i64_s, i64imm, imm>;
17455f757f3fSDimitry Andricdefm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch",
1746*0fca6ea1SDimitry Andric  atomic_swap_i64_gen, i64imm, imm>;
17475f757f3fSDimitry Andricdefm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1748*0fca6ea1SDimitry Andric  ".exch", atomic_swap_i64_gen, i64imm, imm>;
17490b57cec5SDimitry Andric
17500b57cec5SDimitry Andric// atom_max
17510b57cec5SDimitry Andric
1752*0fca6ea1SDimitry Andricdef atomic_load_max_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1753*0fca6ea1SDimitry Andric  , (atomic_load_max_i32 node:$a, node:$b)>;
1754*0fca6ea1SDimitry Andricdef atomic_load_max_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1755*0fca6ea1SDimitry Andric  (atomic_load_max_i32 node:$a, node:$b)>;
1756*0fca6ea1SDimitry Andricdef atomic_load_max_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1757*0fca6ea1SDimitry Andric  (atomic_load_max_i32 node:$a, node:$b)>;
1758*0fca6ea1SDimitry Andricdef atomic_load_max_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
1759*0fca6ea1SDimitry Andric  , (atomic_load_max_i64 node:$a, node:$b)>;
1760*0fca6ea1SDimitry Andricdef atomic_load_max_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1761*0fca6ea1SDimitry Andric  (atomic_load_max_i64 node:$a, node:$b)>;
1762*0fca6ea1SDimitry Andricdef atomic_load_max_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1763*0fca6ea1SDimitry Andric  (atomic_load_max_i64 node:$a, node:$b)>;
1764*0fca6ea1SDimitry Andricdef atomic_load_umax_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1765*0fca6ea1SDimitry Andric  (atomic_load_umax_i32 node:$a, node:$b)>;
1766*0fca6ea1SDimitry Andricdef atomic_load_umax_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1767*0fca6ea1SDimitry Andric  (atomic_load_umax_i32 node:$a, node:$b)>;
1768*0fca6ea1SDimitry Andricdef atomic_load_umax_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1769*0fca6ea1SDimitry Andric  (atomic_load_umax_i32 node:$a, node:$b)>;
1770*0fca6ea1SDimitry Andricdef atomic_load_umax_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1771*0fca6ea1SDimitry Andric  (atomic_load_umax_i64 node:$a, node:$b)>;
1772*0fca6ea1SDimitry Andricdef atomic_load_umax_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1773*0fca6ea1SDimitry Andric  (atomic_load_umax_i64 node:$a, node:$b)>;
1774*0fca6ea1SDimitry Andricdef atomic_load_umax_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1775*0fca6ea1SDimitry Andric  (atomic_load_umax_i64 node:$a, node:$b)>;
17760b57cec5SDimitry Andric
17775f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1778*0fca6ea1SDimitry Andric  ".max", atomic_load_max_i32_g, i32imm, imm>;
17795f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1780*0fca6ea1SDimitry Andric  ".max", atomic_load_max_i32_s, i32imm, imm>;
17815f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max",
1782*0fca6ea1SDimitry Andric  atomic_load_max_i32_gen, i32imm, imm>;
17835f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1784*0fca6ea1SDimitry Andric  ".s32", ".max", atomic_load_max_i32_gen, i32imm, imm>;
17855f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1786*0fca6ea1SDimitry Andric  ".max", atomic_load_max_i64_g, i64imm, imm, [hasSM<32>]>;
17875f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1788*0fca6ea1SDimitry Andric  ".max", atomic_load_max_i64_s, i64imm, imm, [hasSM<32>]>;
17895f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max",
1790*0fca6ea1SDimitry Andric  atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
17915f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1792*0fca6ea1SDimitry Andric  ".s64", ".max", atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>;
17935f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1794*0fca6ea1SDimitry Andric  ".max", atomic_load_umax_i32_g, i32imm, imm>;
17955f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1796*0fca6ea1SDimitry Andric  ".max", atomic_load_umax_i32_s, i32imm, imm>;
17975f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max",
1798*0fca6ea1SDimitry Andric  atomic_load_umax_i32_gen, i32imm, imm>;
17995f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1800*0fca6ea1SDimitry Andric  ".u32", ".max", atomic_load_umax_i32_gen, i32imm, imm>;
18015f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1802*0fca6ea1SDimitry Andric  ".max", atomic_load_umax_i64_g, i64imm, imm, [hasSM<32>]>;
18035f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1804*0fca6ea1SDimitry Andric  ".max", atomic_load_umax_i64_s, i64imm, imm, [hasSM<32>]>;
18055f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max",
1806*0fca6ea1SDimitry Andric  atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
18075f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1808*0fca6ea1SDimitry Andric  ".u64", ".max", atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>;
18090b57cec5SDimitry Andric
18100b57cec5SDimitry Andric// atom_min
18110b57cec5SDimitry Andric
1812*0fca6ea1SDimitry Andricdef atomic_load_min_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1813*0fca6ea1SDimitry Andric  (atomic_load_min_i32 node:$a, node:$b)>;
1814*0fca6ea1SDimitry Andricdef atomic_load_min_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1815*0fca6ea1SDimitry Andric  (atomic_load_min_i32 node:$a, node:$b)>;
1816*0fca6ea1SDimitry Andricdef atomic_load_min_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1817*0fca6ea1SDimitry Andric  (atomic_load_min_i32 node:$a, node:$b)>;
1818*0fca6ea1SDimitry Andricdef atomic_load_min_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1819*0fca6ea1SDimitry Andric  (atomic_load_min_i64 node:$a, node:$b)>;
1820*0fca6ea1SDimitry Andricdef atomic_load_min_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1821*0fca6ea1SDimitry Andric  (atomic_load_min_i64 node:$a, node:$b)>;
1822*0fca6ea1SDimitry Andricdef atomic_load_min_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1823*0fca6ea1SDimitry Andric  (atomic_load_min_i64 node:$a, node:$b)>;
1824*0fca6ea1SDimitry Andricdef atomic_load_umin_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1825*0fca6ea1SDimitry Andric  (atomic_load_umin_i32 node:$a, node:$b)>;
1826*0fca6ea1SDimitry Andricdef atomic_load_umin_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1827*0fca6ea1SDimitry Andric  (atomic_load_umin_i32 node:$a, node:$b)>;
1828*0fca6ea1SDimitry Andricdef atomic_load_umin_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1829*0fca6ea1SDimitry Andric  (atomic_load_umin_i32 node:$a, node:$b)>;
1830*0fca6ea1SDimitry Andricdef atomic_load_umin_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1831*0fca6ea1SDimitry Andric  (atomic_load_umin_i64 node:$a, node:$b)>;
1832*0fca6ea1SDimitry Andricdef atomic_load_umin_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1833*0fca6ea1SDimitry Andric  (atomic_load_umin_i64 node:$a, node:$b)>;
1834*0fca6ea1SDimitry Andricdef atomic_load_umin_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1835*0fca6ea1SDimitry Andric  (atomic_load_umin_i64 node:$a, node:$b)>;
18360b57cec5SDimitry Andric
18375f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32",
1838*0fca6ea1SDimitry Andric  ".min", atomic_load_min_i32_g, i32imm, imm>;
18395f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32",
1840*0fca6ea1SDimitry Andric  ".min", atomic_load_min_i32_s, i32imm, imm>;
18415f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min",
1842*0fca6ea1SDimitry Andric  atomic_load_min_i32_gen, i32imm, imm>;
18435f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1844*0fca6ea1SDimitry Andric  ".s32", ".min", atomic_load_min_i32_gen, i32imm, imm>;
18455f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64",
1846*0fca6ea1SDimitry Andric  ".min", atomic_load_min_i64_g, i64imm, imm, [hasSM<32>]>;
18475f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64",
1848*0fca6ea1SDimitry Andric  ".min", atomic_load_min_i64_s, i64imm, imm, [hasSM<32>]>;
18495f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min",
1850*0fca6ea1SDimitry Andric  atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
18515f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1852*0fca6ea1SDimitry Andric  ".s64", ".min", atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>;
18535f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
1854*0fca6ea1SDimitry Andric  ".min", atomic_load_umin_i32_g, i32imm, imm>;
18555f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32",
1856*0fca6ea1SDimitry Andric  ".min", atomic_load_umin_i32_s, i32imm, imm>;
18575f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min",
1858*0fca6ea1SDimitry Andric  atomic_load_umin_i32_gen, i32imm, imm>;
18595f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global",
1860*0fca6ea1SDimitry Andric  ".u32", ".min", atomic_load_umin_i32_gen, i32imm, imm>;
18615f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64",
1862*0fca6ea1SDimitry Andric  ".min", atomic_load_umin_i64_g, i64imm, imm, [hasSM<32>]>;
18635f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64",
1864*0fca6ea1SDimitry Andric  ".min", atomic_load_umin_i64_s, i64imm, imm, [hasSM<32>]>;
18655f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min",
1866*0fca6ea1SDimitry Andric  atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
18675f757f3fSDimitry Andricdefm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global",
1868*0fca6ea1SDimitry Andric  ".u64", ".min", atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>;
18690b57cec5SDimitry Andric
18700b57cec5SDimitry Andric// atom_inc  atom_dec
18710b57cec5SDimitry Andric
18720b57cec5SDimitry Andricdef atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
18730b57cec5SDimitry Andric  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
18740b57cec5SDimitry Andricdef atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
18750b57cec5SDimitry Andric  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
18760b57cec5SDimitry Andricdef atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
18770b57cec5SDimitry Andric  (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>;
18780b57cec5SDimitry Andricdef atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
18790b57cec5SDimitry Andric  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
18800b57cec5SDimitry Andricdef atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
18810b57cec5SDimitry Andric  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
18820b57cec5SDimitry Andricdef atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
18830b57cec5SDimitry Andric  (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>;
18840b57cec5SDimitry Andric
18855f757f3fSDimitry Andricdefm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc",
18860b57cec5SDimitry Andric  atomic_load_inc_32_g, i32imm, imm>;
18875f757f3fSDimitry Andricdefm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc",
18880b57cec5SDimitry Andric  atomic_load_inc_32_s, i32imm, imm>;
18895f757f3fSDimitry Andricdefm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc",
18900b57cec5SDimitry Andric  atomic_load_inc_32_gen, i32imm, imm>;
18915f757f3fSDimitry Andricdefm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
18920b57cec5SDimitry Andric  ".inc", atomic_load_inc_32_gen, i32imm, imm>;
18935f757f3fSDimitry Andricdefm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec",
18940b57cec5SDimitry Andric  atomic_load_dec_32_g, i32imm, imm>;
18955f757f3fSDimitry Andricdefm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec",
18960b57cec5SDimitry Andric  atomic_load_dec_32_s, i32imm, imm>;
18975f757f3fSDimitry Andricdefm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec",
18980b57cec5SDimitry Andric  atomic_load_dec_32_gen, i32imm, imm>;
18995f757f3fSDimitry Andricdefm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32",
19000b57cec5SDimitry Andric  ".dec", atomic_load_dec_32_gen, i32imm, imm>;
19010b57cec5SDimitry Andric
19020b57cec5SDimitry Andric// atom_and
19030b57cec5SDimitry Andric
1904*0fca6ea1SDimitry Andricdef atomic_load_and_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1905*0fca6ea1SDimitry Andric  (atomic_load_and_i32 node:$a, node:$b)>;
1906*0fca6ea1SDimitry Andricdef atomic_load_and_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1907*0fca6ea1SDimitry Andric  (atomic_load_and_i32 node:$a, node:$b)>;
1908*0fca6ea1SDimitry Andricdef atomic_load_and_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1909*0fca6ea1SDimitry Andric  (atomic_load_and_i32 node:$a, node:$b)>;
1910*0fca6ea1SDimitry Andricdef atomic_load_and_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1911*0fca6ea1SDimitry Andric  (atomic_load_and_i64 node:$a, node:$b)>;
1912*0fca6ea1SDimitry Andricdef atomic_load_and_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1913*0fca6ea1SDimitry Andric  (atomic_load_and_i64 node:$a, node:$b)>;
1914*0fca6ea1SDimitry Andricdef atomic_load_and_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1915*0fca6ea1SDimitry Andric  (atomic_load_and_i64 node:$a, node:$b)>;
19160b57cec5SDimitry Andric
19175f757f3fSDimitry Andricdefm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and",
1918*0fca6ea1SDimitry Andric  atomic_load_and_i32_g, i32imm, imm>;
19195f757f3fSDimitry Andricdefm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and",
1920*0fca6ea1SDimitry Andric  atomic_load_and_i32_s, i32imm, imm>;
19215f757f3fSDimitry Andricdefm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and",
1922*0fca6ea1SDimitry Andric  atomic_load_and_i32_gen, i32imm, imm>;
19235f757f3fSDimitry Andricdefm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1924*0fca6ea1SDimitry Andric  ".and", atomic_load_and_i32_gen, i32imm, imm>;
19255f757f3fSDimitry Andricdefm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and",
1926*0fca6ea1SDimitry Andric  atomic_load_and_i64_g, i64imm, imm, [hasSM<32>]>;
19275f757f3fSDimitry Andricdefm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and",
1928*0fca6ea1SDimitry Andric  atomic_load_and_i64_s, i64imm, imm, [hasSM<32>]>;
19295f757f3fSDimitry Andricdefm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and",
1930*0fca6ea1SDimitry Andric  atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
19315f757f3fSDimitry Andricdefm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1932*0fca6ea1SDimitry Andric  ".and", atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>;
19330b57cec5SDimitry Andric
19340b57cec5SDimitry Andric// atom_or
19350b57cec5SDimitry Andric
1936*0fca6ea1SDimitry Andricdef atomic_load_or_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1937*0fca6ea1SDimitry Andric  (atomic_load_or_i32 node:$a, node:$b)>;
1938*0fca6ea1SDimitry Andricdef atomic_load_or_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1939*0fca6ea1SDimitry Andric  (atomic_load_or_i32 node:$a, node:$b)>;
1940*0fca6ea1SDimitry Andricdef atomic_load_or_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1941*0fca6ea1SDimitry Andric  (atomic_load_or_i32 node:$a, node:$b)>;
1942*0fca6ea1SDimitry Andricdef atomic_load_or_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1943*0fca6ea1SDimitry Andric  (atomic_load_or_i64 node:$a, node:$b)>;
1944*0fca6ea1SDimitry Andricdef atomic_load_or_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1945*0fca6ea1SDimitry Andric  (atomic_load_or_i64 node:$a, node:$b)>;
1946*0fca6ea1SDimitry Andricdef atomic_load_or_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1947*0fca6ea1SDimitry Andric  (atomic_load_or_i64 node:$a, node:$b)>;
19480b57cec5SDimitry Andric
19495f757f3fSDimitry Andricdefm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or",
1950*0fca6ea1SDimitry Andric  atomic_load_or_i32_g, i32imm, imm>;
19515f757f3fSDimitry Andricdefm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or",
1952*0fca6ea1SDimitry Andric  atomic_load_or_i32_gen, i32imm, imm>;
19535f757f3fSDimitry Andricdefm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1954*0fca6ea1SDimitry Andric  ".or", atomic_load_or_i32_gen, i32imm, imm>;
19555f757f3fSDimitry Andricdefm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or",
1956*0fca6ea1SDimitry Andric  atomic_load_or_i32_s, i32imm, imm>;
19575f757f3fSDimitry Andricdefm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or",
1958*0fca6ea1SDimitry Andric  atomic_load_or_i64_g, i64imm, imm, [hasSM<32>]>;
19595f757f3fSDimitry Andricdefm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or",
1960*0fca6ea1SDimitry Andric  atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
19615f757f3fSDimitry Andricdefm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1962*0fca6ea1SDimitry Andric  ".or", atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>;
19635f757f3fSDimitry Andricdefm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or",
1964*0fca6ea1SDimitry Andric  atomic_load_or_i64_s, i64imm, imm, [hasSM<32>]>;
19650b57cec5SDimitry Andric
19660b57cec5SDimitry Andric// atom_xor
19670b57cec5SDimitry Andric
1968*0fca6ea1SDimitry Andricdef atomic_load_xor_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1969*0fca6ea1SDimitry Andric  (atomic_load_xor_i32 node:$a, node:$b)>;
1970*0fca6ea1SDimitry Andricdef atomic_load_xor_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1971*0fca6ea1SDimitry Andric  (atomic_load_xor_i32 node:$a, node:$b)>;
1972*0fca6ea1SDimitry Andricdef atomic_load_xor_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1973*0fca6ea1SDimitry Andric  (atomic_load_xor_i32 node:$a, node:$b)>;
1974*0fca6ea1SDimitry Andricdef atomic_load_xor_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1975*0fca6ea1SDimitry Andric  (atomic_load_xor_i64 node:$a, node:$b)>;
1976*0fca6ea1SDimitry Andricdef atomic_load_xor_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1977*0fca6ea1SDimitry Andric  (atomic_load_xor_i64 node:$a, node:$b)>;
1978*0fca6ea1SDimitry Andricdef atomic_load_xor_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1979*0fca6ea1SDimitry Andric  (atomic_load_xor_i64 node:$a, node:$b)>;
19800b57cec5SDimitry Andric
19815f757f3fSDimitry Andricdefm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor",
1982*0fca6ea1SDimitry Andric  atomic_load_xor_i32_g, i32imm, imm>;
19835f757f3fSDimitry Andricdefm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor",
1984*0fca6ea1SDimitry Andric  atomic_load_xor_i32_s, i32imm, imm>;
19855f757f3fSDimitry Andricdefm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor",
1986*0fca6ea1SDimitry Andric  atomic_load_xor_i32_gen, i32imm, imm>;
19875f757f3fSDimitry Andricdefm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32",
1988*0fca6ea1SDimitry Andric  ".xor", atomic_load_xor_i32_gen, i32imm, imm>;
19895f757f3fSDimitry Andricdefm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor",
1990*0fca6ea1SDimitry Andric  atomic_load_xor_i64_g, i64imm, imm, [hasSM<32>]>;
19915f757f3fSDimitry Andricdefm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor",
1992*0fca6ea1SDimitry Andric  atomic_load_xor_i64_s, i64imm, imm, [hasSM<32>]>;
19935f757f3fSDimitry Andricdefm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor",
1994*0fca6ea1SDimitry Andric  atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
19955f757f3fSDimitry Andricdefm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64",
1996*0fca6ea1SDimitry Andric  ".xor", atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>;
19970b57cec5SDimitry Andric
19980b57cec5SDimitry Andric// atom_cas
19990b57cec5SDimitry Andric
2000*0fca6ea1SDimitry Andricdef atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2001*0fca6ea1SDimitry Andric  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2002*0fca6ea1SDimitry Andricdef atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2003*0fca6ea1SDimitry Andric  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2004*0fca6ea1SDimitry Andricdef atomic_cmp_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2005*0fca6ea1SDimitry Andric  (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>;
2006*0fca6ea1SDimitry Andricdef atomic_cmp_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c),
2007*0fca6ea1SDimitry Andric  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2008*0fca6ea1SDimitry Andricdef atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c),
2009*0fca6ea1SDimitry Andric  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
2010*0fca6ea1SDimitry Andricdef atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c),
2011*0fca6ea1SDimitry Andric  (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>;
20120b57cec5SDimitry Andric
20135f757f3fSDimitry Andricdefm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas",
2014*0fca6ea1SDimitry Andric  atomic_cmp_swap_i32_g, i32imm>;
20155f757f3fSDimitry Andricdefm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas",
2016*0fca6ea1SDimitry Andric  atomic_cmp_swap_i32_s, i32imm>;
20175f757f3fSDimitry Andricdefm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas",
2018*0fca6ea1SDimitry Andric  atomic_cmp_swap_i32_gen, i32imm>;
20195f757f3fSDimitry Andricdefm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32",
2020*0fca6ea1SDimitry Andric  ".cas", atomic_cmp_swap_i32_gen, i32imm>;
20215f757f3fSDimitry Andricdefm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas",
2022*0fca6ea1SDimitry Andric  atomic_cmp_swap_i64_g, i64imm>;
20235f757f3fSDimitry Andricdefm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas",
2024*0fca6ea1SDimitry Andric  atomic_cmp_swap_i64_s, i64imm>;
20255f757f3fSDimitry Andricdefm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas",
2026*0fca6ea1SDimitry Andric  atomic_cmp_swap_i64_gen, i64imm>;
20275f757f3fSDimitry Andricdefm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64",
2028*0fca6ea1SDimitry Andric  ".cas", atomic_cmp_swap_i64_gen, i64imm>;
20290b57cec5SDimitry Andric
20300b57cec5SDimitry Andric// Support for scoped atomic operations.  Matches
20310b57cec5SDimitry Andric// int_nvvm_atomic_{op}_{space}_{type}_{scope}
20320b57cec5SDimitry Andric// and converts it into the appropriate instruction.
20330b57cec5SDimitry Andric// NOTE: not all possible combinations are implemented
20340b57cec5SDimitry Andric//  'space' is limited to generic as it's the only one needed to support CUDA.
20350b57cec5SDimitry Andric//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
20365f757f3fSDimitry Andricclass ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds,
20370b57cec5SDimitry Andric                  dag ins, dag Operands>
20380b57cec5SDimitry Andric      : NVPTXInst<(outs regclass:$result), ins,
20390b57cec5SDimitry Andric                  AsmStr,
20405f757f3fSDimitry Andric                  [(set (regT regclass:$result), Operands)]>,
20410b57cec5SDimitry Andric        Requires<Preds>;
20420b57cec5SDimitry Andric
20430b57cec5SDimitry Andric// Define instruction variants for all addressing modes.
20440b57cec5SDimitry Andricmulticlass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
20455f757f3fSDimitry Andric                       ValueType regT, NVPTXRegClass regclass, Operand ImmType,
20460b57cec5SDimitry Andric                       SDNode Imm, ValueType ImmTy,
20470b57cec5SDimitry Andric                       list<Predicate> Preds> {
20480b57cec5SDimitry Andric  let AddedComplexity = 1 in {
20495f757f3fSDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2050*0fca6ea1SDimitry Andric                      (ins Int16Regs:$src, regclass:$b),
2051*0fca6ea1SDimitry Andric                      (Intr (i16 Int16Regs:$src), (regT regclass:$b))>;
2052*0fca6ea1SDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20530b57cec5SDimitry Andric                      (ins Int32Regs:$src, regclass:$b),
20545f757f3fSDimitry Andric                      (Intr (i32 Int32Regs:$src), (regT regclass:$b))>;
20555f757f3fSDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20560b57cec5SDimitry Andric                      (ins Int64Regs:$src, regclass:$b),
20575f757f3fSDimitry Andric                      (Intr (i64 Int64Regs:$src), (regT regclass:$b))>;
20580b57cec5SDimitry Andric  }
20590b57cec5SDimitry Andric  // tablegen can't infer argument types from Intrinsic (though it can
20600b57cec5SDimitry Andric  // from Instruction) so we have to enforce specific type on
20610b57cec5SDimitry Andric  // immediates via explicit cast to ImmTy.
20625f757f3fSDimitry Andric  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
2063*0fca6ea1SDimitry Andric                    (ins Int16Regs:$src, ImmType:$b),
2064*0fca6ea1SDimitry Andric                    (Intr (i16 Int16Regs:$src), (ImmTy Imm:$b))>;
2065*0fca6ea1SDimitry Andric  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20660b57cec5SDimitry Andric                    (ins Int32Regs:$src, ImmType:$b),
20675f757f3fSDimitry Andric                    (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>;
20685f757f3fSDimitry Andric  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20690b57cec5SDimitry Andric                    (ins Int64Regs:$src, ImmType:$b),
20705f757f3fSDimitry Andric                    (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>;
20710b57cec5SDimitry Andric}
20720b57cec5SDimitry Andric
20730b57cec5SDimitry Andricmulticlass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
20745f757f3fSDimitry Andric                       ValueType regT, NVPTXRegClass regclass,
20755f757f3fSDimitry Andric                       Operand ImmType, SDNode Imm, ValueType ImmTy,
20760b57cec5SDimitry Andric                       list<Predicate> Preds> {
20770b57cec5SDimitry Andric  // Variants for register/immediate permutations of $b and $c
20780b57cec5SDimitry Andric  let AddedComplexity = 2 in {
20795f757f3fSDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20800b57cec5SDimitry Andric                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
20815f757f3fSDimitry Andric                      (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
20825f757f3fSDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20830b57cec5SDimitry Andric                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
20845f757f3fSDimitry Andric                      (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>;
20850b57cec5SDimitry Andric  }
20860b57cec5SDimitry Andric  let AddedComplexity = 1 in {
20875f757f3fSDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20880b57cec5SDimitry Andric                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
20895f757f3fSDimitry Andric                      (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
20905f757f3fSDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20910b57cec5SDimitry Andric                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
20925f757f3fSDimitry Andric                      (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>;
20935f757f3fSDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20940b57cec5SDimitry Andric                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
20955f757f3fSDimitry Andric                      (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
20965f757f3fSDimitry Andric    def : ATOM23_impl<AsmStr, regT, regclass, Preds,
20970b57cec5SDimitry Andric                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
20985f757f3fSDimitry Andric                      (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>;
20990b57cec5SDimitry Andric  }
21005f757f3fSDimitry Andric  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
21010b57cec5SDimitry Andric                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
21025f757f3fSDimitry Andric                    (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
21035f757f3fSDimitry Andric  def : ATOM23_impl<AsmStr, regT, regclass, Preds,
21040b57cec5SDimitry Andric                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
21055f757f3fSDimitry Andric                    (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>;
21060b57cec5SDimitry Andric}
21070b57cec5SDimitry Andric
210881ad6265SDimitry Andric// Constructs intrinsic name and instruction asm strings.
21090b57cec5SDimitry Andricmulticlass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
21100b57cec5SDimitry Andric                       string ScopeStr, string SpaceStr,
21115f757f3fSDimitry Andric                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
21120b57cec5SDimitry Andric                       ValueType ImmTy, list<Predicate> Preds> {
21130b57cec5SDimitry Andric  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
21140b57cec5SDimitry Andric                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
21150b57cec5SDimitry Andric                            # "." # OpStr # "." # TypeStr
21160b57cec5SDimitry Andric                            # " \t$result, [$src], $b;",
21170b57cec5SDimitry Andric                     !cast<Intrinsic>(
21180b57cec5SDimitry Andric                            "int_nvvm_atomic_" # OpStr
21190b57cec5SDimitry Andric                            # "_" # SpaceStr # "_" # IntTypeStr
2120e8d8bef9SDimitry Andric                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
21215f757f3fSDimitry Andric                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
21220b57cec5SDimitry Andric}
21230b57cec5SDimitry Andricmulticlass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
21240b57cec5SDimitry Andric                       string ScopeStr, string SpaceStr,
21255f757f3fSDimitry Andric                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
21260b57cec5SDimitry Andric                       ValueType ImmTy, list<Predicate> Preds> {
21270b57cec5SDimitry Andric  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
21280b57cec5SDimitry Andric                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
21290b57cec5SDimitry Andric                            # "." # OpStr # "." # TypeStr
21300b57cec5SDimitry Andric                            # " \t$result, [$src], $b, $c;",
21310b57cec5SDimitry Andric                     !cast<Intrinsic>(
21320b57cec5SDimitry Andric                            "int_nvvm_atomic_" # OpStr
21330b57cec5SDimitry Andric                            # "_" # SpaceStr # "_" # IntTypeStr
2134e8d8bef9SDimitry Andric                            # !if(!empty(ScopeStr), "", "_" # ScopeStr)),
21355f757f3fSDimitry Andric                     regT, regclass, ImmType, Imm, ImmTy, Preds>;
21360b57cec5SDimitry Andric}
21370b57cec5SDimitry Andric
21380b57cec5SDimitry Andric// Constructs variants for different address spaces.
21390b57cec5SDimitry Andric// For now we only need variants for generic space pointers.
21400b57cec5SDimitry Andricmulticlass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
21415f757f3fSDimitry Andric                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
21420b57cec5SDimitry Andric                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
21430b57cec5SDimitry Andric   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
21445f757f3fSDimitry Andric                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
21450b57cec5SDimitry Andric}
21460b57cec5SDimitry Andricmulticlass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
21475f757f3fSDimitry Andric                       string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType,
21480b57cec5SDimitry Andric                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
21490b57cec5SDimitry Andric   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
21505f757f3fSDimitry Andric                            regT, regclass, ImmType, Imm, ImmTy, Preds>;
21510b57cec5SDimitry Andric}
21520b57cec5SDimitry Andric
21530b57cec5SDimitry Andric// Constructs variants for different scopes of atomic op.
21540b57cec5SDimitry Andricmulticlass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
21555f757f3fSDimitry Andric                       ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
21560b57cec5SDimitry Andric                       ValueType ImmTy, list<Predicate> Preds> {
21570b57cec5SDimitry Andric   // .gpu scope is default and is currently covered by existing
21580b57cec5SDimitry Andric   // atomics w/o explicitly specified scope.
21590b57cec5SDimitry Andric   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
21605f757f3fSDimitry Andric                           regT, regclass, ImmType, Imm, ImmTy,
21610b57cec5SDimitry Andric                           !listconcat(Preds,[hasAtomScope])>;
21620b57cec5SDimitry Andric   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
21635f757f3fSDimitry Andric                           regT, regclass, ImmType, Imm, ImmTy,
21640b57cec5SDimitry Andric                           !listconcat(Preds,[hasAtomScope])>;
21650b57cec5SDimitry Andric}
21660b57cec5SDimitry Andricmulticlass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
21675f757f3fSDimitry Andric           ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
21680b57cec5SDimitry Andric           list<Predicate> Preds> {
21690b57cec5SDimitry Andric   // No need to define ".gpu"-scoped atomics.  They do the same thing
21700b57cec5SDimitry Andric   // as the regular, non-scoped atomics defined elsewhere.
21710b57cec5SDimitry Andric   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
21725f757f3fSDimitry Andric                           regT, regclass, ImmType, Imm, ImmTy,
21730b57cec5SDimitry Andric                           !listconcat(Preds,[hasAtomScope])>;
21740b57cec5SDimitry Andric   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
21755f757f3fSDimitry Andric                           regT, regclass, ImmType, Imm, ImmTy,
21760b57cec5SDimitry Andric                           !listconcat(Preds,[hasAtomScope])>;
21770b57cec5SDimitry Andric}
21780b57cec5SDimitry Andric
21790b57cec5SDimitry Andric// atom.add
21800b57cec5SDimitry Andricmulticlass ATOM2_add_impl<string OpStr> {
21815f757f3fSDimitry Andric   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
21825f757f3fSDimitry Andric   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
21835f757f3fSDimitry Andric   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>;
2184*0fca6ea1SDimitry Andric   defm _bf16  : ATOM2S_impl<OpStr, "f", "bf16", bf16, Int16Regs, bf16imm, fpimm, bf16,
2185*0fca6ea1SDimitry Andric                            [hasSM<90>, hasPTX<78>]>;
2186*0fca6ea1SDimitry Andric   defm _f16  : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16,
2187*0fca6ea1SDimitry Andric                            [hasSM<70>, hasPTX<63>]>;
21885f757f3fSDimitry Andric   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32,
21890b57cec5SDimitry Andric                            []>;
21905f757f3fSDimitry Andric   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64,
21910b57cec5SDimitry Andric                            [hasAtomAddF64]>;
21920b57cec5SDimitry Andric}
21930b57cec5SDimitry Andric
21940b57cec5SDimitry Andric// atom.{and,or,xor}
21950b57cec5SDimitry Andricmulticlass ATOM2_bitwise_impl<string OpStr> {
21965f757f3fSDimitry Andric   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
21975f757f3fSDimitry Andric   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64,
21980b57cec5SDimitry Andric                            [hasAtomBitwise64]>;
21990b57cec5SDimitry Andric}
22000b57cec5SDimitry Andric
22010b57cec5SDimitry Andric// atom.exch
22020b57cec5SDimitry Andricmulticlass ATOM2_exch_impl<string OpStr> {
22035f757f3fSDimitry Andric   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
22045f757f3fSDimitry Andric   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
22050b57cec5SDimitry Andric}
22060b57cec5SDimitry Andric
22070b57cec5SDimitry Andric// atom.{min,max}
22080b57cec5SDimitry Andricmulticlass ATOM2_minmax_impl<string OpStr> {
22095f757f3fSDimitry Andric   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>;
22105f757f3fSDimitry Andric   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
22115f757f3fSDimitry Andric   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64,
22120b57cec5SDimitry Andric                            [hasAtomMinMax64]>;
22135f757f3fSDimitry Andric   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64,
22140b57cec5SDimitry Andric                            [hasAtomMinMax64]>;
22150b57cec5SDimitry Andric}
22160b57cec5SDimitry Andric
22170b57cec5SDimitry Andric// atom.{inc,dec}
22180b57cec5SDimitry Andricmulticlass ATOM2_incdec_impl<string OpStr> {
22195f757f3fSDimitry Andric   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>;
22200b57cec5SDimitry Andric}
22210b57cec5SDimitry Andric
22220b57cec5SDimitry Andric// atom.cas
22230b57cec5SDimitry Andricmulticlass ATOM3_cas_impl<string OpStr> {
22245f757f3fSDimitry Andric   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>;
22255f757f3fSDimitry Andric   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>;
22260b57cec5SDimitry Andric}
22270b57cec5SDimitry Andric
22280b57cec5SDimitry Andricdefm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
22290b57cec5SDimitry Andricdefm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
22300b57cec5SDimitry Andricdefm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
22310b57cec5SDimitry Andricdefm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
22320b57cec5SDimitry Andricdefm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
22330b57cec5SDimitry Andricdefm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
22340b57cec5SDimitry Andricdefm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
22350b57cec5SDimitry Andricdefm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
22360b57cec5SDimitry Andricdefm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
22370b57cec5SDimitry Andricdefm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
22380b57cec5SDimitry Andric
22390b57cec5SDimitry Andric//-----------------------------------
22400b57cec5SDimitry Andric// Support for ldu on sm_20 or later
22410b57cec5SDimitry Andric//-----------------------------------
22420b57cec5SDimitry Andric
22430b57cec5SDimitry Andric// Don't annotate ldu instructions as mayLoad, as they load from memory that is
22440b57cec5SDimitry Andric// read-only in a kernel.
22450b57cec5SDimitry Andric
22460b57cec5SDimitry Andric// Scalar
22470b57cec5SDimitry Andric
22480b57cec5SDimitry Andricmulticlass LDU_G<string TyStr, NVPTXRegClass regclass> {
22490b57cec5SDimitry Andric  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
22500b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr),
22510b57cec5SDimitry Andric                      []>, Requires<[hasLDU]>;
22520b57cec5SDimitry Andric  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
22530b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr),
22540b57cec5SDimitry Andric                        []>, Requires<[hasLDU]>;
22550b57cec5SDimitry Andric def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
22560b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr),
22570b57cec5SDimitry Andric                      []>, Requires<[hasLDU]>;
22580b57cec5SDimitry Andric def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
22590b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr),
22600b57cec5SDimitry Andric                      []>, Requires<[hasLDU]>;
22610b57cec5SDimitry Andric def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
22620b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr),
22630b57cec5SDimitry Andric                        []>, Requires<[hasLDU]>;
22640b57cec5SDimitry Andric}
22650b57cec5SDimitry Andric
22660b57cec5SDimitry Andricdefm INT_PTX_LDU_GLOBAL_i8  : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
22670b57cec5SDimitry Andricdefm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
22680b57cec5SDimitry Andricdefm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
22690b57cec5SDimitry Andricdefm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
22700b57cec5SDimitry Andricdefm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
22710b57cec5SDimitry Andricdefm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
22720b57cec5SDimitry Andric
22730b57cec5SDimitry Andric// vector
22740b57cec5SDimitry Andric
22750b57cec5SDimitry Andric// Elementized vector ldu
22760b57cec5SDimitry Andricmulticlass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
22770b57cec5SDimitry Andric def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
22780b57cec5SDimitry Andric                     (ins Int32Regs:$src),
22790b57cec5SDimitry Andric                     !strconcat("ldu.global.", TyStr), []>;
22800b57cec5SDimitry Andric def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
22810b57cec5SDimitry Andric                     (ins Int64Regs:$src),
22820b57cec5SDimitry Andric                     !strconcat("ldu.global.", TyStr), []>;
22830b57cec5SDimitry Andric def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
22840b57cec5SDimitry Andric                     (ins MEMri:$src),
22850b57cec5SDimitry Andric                     !strconcat("ldu.global.", TyStr), []>;
22860b57cec5SDimitry Andric def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
22870b57cec5SDimitry Andric                     (ins MEMri64:$src),
22880b57cec5SDimitry Andric                     !strconcat("ldu.global.", TyStr), []>;
22890b57cec5SDimitry Andric def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
22900b57cec5SDimitry Andric                     (ins imemAny:$src),
22910b57cec5SDimitry Andric                     !strconcat("ldu.global.", TyStr), []>;
22920b57cec5SDimitry Andric}
22930b57cec5SDimitry Andric
22940b57cec5SDimitry Andricmulticlass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
22950b57cec5SDimitry Andric def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
22960b57cec5SDimitry Andric                            regclass:$dst4), (ins Int32Regs:$src),
22970b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr), []>;
22980b57cec5SDimitry Andric def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
22990b57cec5SDimitry Andric                            regclass:$dst4), (ins Int64Regs:$src),
23000b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr), []>;
23010b57cec5SDimitry Andric def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
23020b57cec5SDimitry Andric                            regclass:$dst4), (ins MEMri:$src),
23030b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr), []>;
23040b57cec5SDimitry Andric def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
23050b57cec5SDimitry Andric                            regclass:$dst4), (ins MEMri64:$src),
23060b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr), []>;
23070b57cec5SDimitry Andric def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
23080b57cec5SDimitry Andric                            regclass:$dst4), (ins imemAny:$src),
23090b57cec5SDimitry Andric               !strconcat("ldu.global.", TyStr), []>;
23100b57cec5SDimitry Andric}
23110b57cec5SDimitry Andric
23120b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v2i8_ELE
23130b57cec5SDimitry Andric  : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
23140b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v2i16_ELE
23150b57cec5SDimitry Andric  : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
23160b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v2i32_ELE
23170b57cec5SDimitry Andric  : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
23180b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v2f32_ELE
23190b57cec5SDimitry Andric  : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
23200b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v2i64_ELE
23210b57cec5SDimitry Andric  : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
23220b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v2f64_ELE
23230b57cec5SDimitry Andric  : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
23240b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v4i8_ELE
23250b57cec5SDimitry Andric  : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
23260b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v4i16_ELE
23270b57cec5SDimitry Andric  : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
23280b57cec5SDimitry Andric    Int16Regs>;
23290b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v4i32_ELE
23300b57cec5SDimitry Andric  : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
23310b57cec5SDimitry Andric    Int32Regs>;
23320b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v4f16_ELE
23330b57cec5SDimitry Andric  : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
233406c3fb27SDimitry Andric    Int16Regs>;
23350b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v4f16x2_ELE
23360b57cec5SDimitry Andric  : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
233706c3fb27SDimitry Andric    Int32Regs>;
23380b57cec5SDimitry Andricdefm INT_PTX_LDU_G_v4f32_ELE
23390b57cec5SDimitry Andric  : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
23400b57cec5SDimitry Andric    Float32Regs>;
23410b57cec5SDimitry Andric
23420b57cec5SDimitry Andric
23430b57cec5SDimitry Andric//-----------------------------------
23440b57cec5SDimitry Andric// Support for ldg on sm_35 or later
23450b57cec5SDimitry Andric//-----------------------------------
23460b57cec5SDimitry Andric
23470b57cec5SDimitry Andric// Don't annotate ld.global.nc as mayLoad, because these loads go through the
23480b57cec5SDimitry Andric// non-coherent texture cache, and therefore the values read must be read-only
23490b57cec5SDimitry Andric// during the lifetime of the kernel.
23500b57cec5SDimitry Andric
23510b57cec5SDimitry Andricmulticlass LDG_G<string TyStr, NVPTXRegClass regclass> {
23520b57cec5SDimitry Andric  def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
23530b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr),
23540b57cec5SDimitry Andric                      []>, Requires<[hasLDG]>;
23550b57cec5SDimitry Andric  def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
23560b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr),
23570b57cec5SDimitry Andric                        []>, Requires<[hasLDG]>;
23580b57cec5SDimitry Andric def avar:  NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
23590b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr),
23600b57cec5SDimitry Andric                      []>, Requires<[hasLDG]>;
23610b57cec5SDimitry Andric def ari :  NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
23620b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr),
23630b57cec5SDimitry Andric                      []>, Requires<[hasLDG]>;
23640b57cec5SDimitry Andric def ari64 :  NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
23650b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr),
23660b57cec5SDimitry Andric                        []>, Requires<[hasLDG]>;
23670b57cec5SDimitry Andric}
23680b57cec5SDimitry Andric
23690b57cec5SDimitry Andricdefm INT_PTX_LDG_GLOBAL_i8
23700b57cec5SDimitry Andric  : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
23710b57cec5SDimitry Andricdefm INT_PTX_LDG_GLOBAL_i16
23720b57cec5SDimitry Andric  : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
23730b57cec5SDimitry Andricdefm INT_PTX_LDG_GLOBAL_i32
23740b57cec5SDimitry Andric  : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
23750b57cec5SDimitry Andricdefm INT_PTX_LDG_GLOBAL_i64
23760b57cec5SDimitry Andric  : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
23770b57cec5SDimitry Andricdefm INT_PTX_LDG_GLOBAL_f32
23780b57cec5SDimitry Andric  : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
23790b57cec5SDimitry Andricdefm INT_PTX_LDG_GLOBAL_f64
23800b57cec5SDimitry Andric  : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
23810b57cec5SDimitry Andric
23820b57cec5SDimitry Andric// vector
23830b57cec5SDimitry Andric
23840b57cec5SDimitry Andric// Elementized vector ldg
23850b57cec5SDimitry Andricmulticlass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> {
23860b57cec5SDimitry Andric def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
23870b57cec5SDimitry Andric                     (ins Int32Regs:$src),
23880b57cec5SDimitry Andric                     !strconcat("ld.global.nc.", TyStr), []>;
23890b57cec5SDimitry Andric def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
23900b57cec5SDimitry Andric                     (ins Int64Regs:$src),
23910b57cec5SDimitry Andric                     !strconcat("ld.global.nc.", TyStr), []>;
23920b57cec5SDimitry Andric def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
23930b57cec5SDimitry Andric                     (ins MEMri:$src),
23940b57cec5SDimitry Andric                     !strconcat("ld.global.nc.", TyStr), []>;
23950b57cec5SDimitry Andric def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
23960b57cec5SDimitry Andric                     (ins MEMri64:$src),
23970b57cec5SDimitry Andric                     !strconcat("ld.global.nc.", TyStr), []>;
23980b57cec5SDimitry Andric def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
23990b57cec5SDimitry Andric                     (ins imemAny:$src),
24000b57cec5SDimitry Andric                     !strconcat("ld.global.nc.", TyStr), []>;
24010b57cec5SDimitry Andric}
24020b57cec5SDimitry Andric
24030b57cec5SDimitry Andricmulticlass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> {
24040b57cec5SDimitry Andric  def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
24050b57cec5SDimitry Andric                              regclass:$dst4), (ins Int32Regs:$src),
24060b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr), []>;
24070b57cec5SDimitry Andric  def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
24080b57cec5SDimitry Andric                               regclass:$dst4), (ins Int64Regs:$src),
24090b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr), []>;
24100b57cec5SDimitry Andric  def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
24110b57cec5SDimitry Andric                              regclass:$dst4), (ins MEMri:$src),
24120b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr), []>;
24130b57cec5SDimitry Andric  def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
24140b57cec5SDimitry Andric                              regclass:$dst4), (ins MEMri64:$src),
24150b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr), []>;
24160b57cec5SDimitry Andric  def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
24170b57cec5SDimitry Andric                             regclass:$dst4), (ins imemAny:$src),
24180b57cec5SDimitry Andric               !strconcat("ld.global.nc.", TyStr), []>;
24190b57cec5SDimitry Andric}
24200b57cec5SDimitry Andric
24210b57cec5SDimitry Andric// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
24220b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v2i8_ELE
24230b57cec5SDimitry Andric  : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];",  Int16Regs>;
24240b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v2i16_ELE
24250b57cec5SDimitry Andric  : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
24260b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v2i32_ELE
24270b57cec5SDimitry Andric  : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
24280b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v2f32_ELE
24290b57cec5SDimitry Andric  : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
24300b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v2i64_ELE
24310b57cec5SDimitry Andric  : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>;
24320b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v2f64_ELE
24330b57cec5SDimitry Andric  : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>;
24340b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v4i8_ELE
24350b57cec5SDimitry Andric  : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
24360b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v4i16_ELE
24370b57cec5SDimitry Andric  : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
24380b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v4i32_ELE
24390b57cec5SDimitry Andric  : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
24400b57cec5SDimitry Andricdefm INT_PTX_LDG_G_v4f32_ELE
24410b57cec5SDimitry Andric  : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
24420b57cec5SDimitry Andric
24430b57cec5SDimitry Andric
2444*0fca6ea1SDimitry Andricmulticlass NG_TO_G<string Str, Intrinsic Intrin, Predicate ShortPtr> {
2445*0fca6ea1SDimitry Andric   def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
24460b57cec5SDimitry Andric          !strconcat("cvta.", Str, ".u32 \t$result, $src;"),
24470b57cec5SDimitry Andric      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2448*0fca6ea1SDimitry Andric   def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
24490b57cec5SDimitry Andric          !strconcat("cvta.", Str, ".u64 \t$result, $src;"),
24500b57cec5SDimitry Andric      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2451*0fca6ea1SDimitry Andric   def _6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src),
24520b57cec5SDimitry Andric          "{{ .reg .b64 %tmp;\n\t"
24530b57cec5SDimitry Andric          #"  cvt.u64.u32 \t%tmp, $src;\n\t"
24540b57cec5SDimitry Andric          #"  cvta." # Str # ".u64 \t$result, %tmp; }}",
24550b57cec5SDimitry Andric      [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>,
2456*0fca6ea1SDimitry Andric      Requires<[ShortPtr]>;
24570b57cec5SDimitry Andric}
24580b57cec5SDimitry Andric
2459*0fca6ea1SDimitry Andricmulticlass G_TO_NG<string Str, Intrinsic Intrin, Predicate ShortPtr> {
2460*0fca6ea1SDimitry Andric   def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src),
24610b57cec5SDimitry Andric          !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"),
24620b57cec5SDimitry Andric      [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>;
2463*0fca6ea1SDimitry Andric   def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src),
24640b57cec5SDimitry Andric          !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"),
24650b57cec5SDimitry Andric      [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>;
2466*0fca6ea1SDimitry Andric   def _3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src),
24670b57cec5SDimitry Andric          "{{ .reg .b64 %tmp;\n\t"
24680b57cec5SDimitry Andric          #"  cvta.to." # Str # ".u64 \t%tmp, $src;\n\t"
24690b57cec5SDimitry Andric          #"  cvt.u32.u64 \t$result, %tmp; }}",
24700b57cec5SDimitry Andric      [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>,
2471*0fca6ea1SDimitry Andric      Requires<[ShortPtr]>;
24720b57cec5SDimitry Andric}
24730b57cec5SDimitry Andric
2474*0fca6ea1SDimitry Andricdefm cvta_local  : NG_TO_G<"local", int_nvvm_ptr_local_to_gen, useShortPtrLocal>;
2475*0fca6ea1SDimitry Andricdefm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen, useShortPtrShared>;
2476*0fca6ea1SDimitry Andricdefm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen, False>;
2477*0fca6ea1SDimitry Andricdefm cvta_const  : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen, useShortPtrConst>;
2478*0fca6ea1SDimitry Andricdefm cvta_param  : NG_TO_G<"param", int_nvvm_ptr_param_to_gen, False>;
24790b57cec5SDimitry Andric
2480*0fca6ea1SDimitry Andricdefm cvta_to_local  : G_TO_NG<"local", int_nvvm_ptr_gen_to_local, useShortPtrLocal>;
2481*0fca6ea1SDimitry Andricdefm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared, useShortPtrShared>;
2482*0fca6ea1SDimitry Andricdefm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global, False>;
2483*0fca6ea1SDimitry Andricdefm cvta_to_const  : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant, useShortPtrConst>;
24840b57cec5SDimitry Andric
24850b57cec5SDimitry Andric// nvvm.ptr.gen.to.param
24860b57cec5SDimitry Andricdef nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result),
24870b57cec5SDimitry Andric  (ins Int32Regs:$src),
24880b57cec5SDimitry Andric                        "mov.u32 \t$result, $src;",
24890b57cec5SDimitry Andric                              [(set Int32Regs:$result,
24900b57cec5SDimitry Andric                                (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>;
24910b57cec5SDimitry Andricdef nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result),
24920b57cec5SDimitry Andric  (ins Int64Regs:$src),
24930b57cec5SDimitry Andric                        "mov.u64 \t$result, $src;",
24940b57cec5SDimitry Andric                              [(set Int64Regs:$result,
24950b57cec5SDimitry Andric                                (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>;
24960b57cec5SDimitry Andric
24970b57cec5SDimitry Andric
24980b57cec5SDimitry Andric// nvvm.move intrinsicc
24990b57cec5SDimitry Andricdef nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
25000b57cec5SDimitry Andric                             "mov.b16 \t$r, $s;",
25010b57cec5SDimitry Andric                             [(set Int16Regs:$r,
25020b57cec5SDimitry Andric                               (int_nvvm_move_i16 Int16Regs:$s))]>;
25030b57cec5SDimitry Andricdef nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
25040b57cec5SDimitry Andric                             "mov.b32 \t$r, $s;",
25050b57cec5SDimitry Andric                             [(set Int32Regs:$r,
25060b57cec5SDimitry Andric                               (int_nvvm_move_i32 Int32Regs:$s))]>;
25070b57cec5SDimitry Andricdef nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
25080b57cec5SDimitry Andric                             "mov.b64 \t$r, $s;",
25090b57cec5SDimitry Andric                             [(set Int64Regs:$r,
25100b57cec5SDimitry Andric                               (int_nvvm_move_i64 Int64Regs:$s))]>;
25110b57cec5SDimitry Andricdef nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s),
25120b57cec5SDimitry Andric                             "mov.f32 \t$r, $s;",
25130b57cec5SDimitry Andric                             [(set Float32Regs:$r,
25140b57cec5SDimitry Andric                               (int_nvvm_move_float Float32Regs:$s))]>;
25150b57cec5SDimitry Andricdef nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s),
25160b57cec5SDimitry Andric                             "mov.f64 \t$r, $s;",
25170b57cec5SDimitry Andric                             [(set Float64Regs:$r,
25180b57cec5SDimitry Andric                               (int_nvvm_move_double Float64Regs:$s))]>;
25190b57cec5SDimitry Andricdef nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s),
25200b57cec5SDimitry Andric                             "mov.u32 \t$r, $s;",
25210b57cec5SDimitry Andric                             [(set Int32Regs:$r,
25220b57cec5SDimitry Andric                               (int_nvvm_move_ptr Int32Regs:$s))]>;
25230b57cec5SDimitry Andricdef nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s),
25240b57cec5SDimitry Andric                             "mov.u64 \t$r, $s;",
25250b57cec5SDimitry Andric                             [(set Int64Regs:$r,
25260b57cec5SDimitry Andric                               (int_nvvm_move_ptr Int64Regs:$s))]>;
25270b57cec5SDimitry Andric
25280b57cec5SDimitry Andric// @TODO: Are these actually needed, or will we always just see symbols
25290b57cec5SDimitry Andric// copied to registers first?
25300b57cec5SDimitry Andric/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s),
25310b57cec5SDimitry Andric                             "mov.u32 \t$r, $s;",
25320b57cec5SDimitry Andric                             [(set Int32Regs:$r,
25330b57cec5SDimitry Andric                             (int_nvvm_move_ptr texternalsym:$s))]>;
25340b57cec5SDimitry Andricdef nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s),
25350b57cec5SDimitry Andric                             "mov.u64 \t$r, $s;",
25360b57cec5SDimitry Andric                             [(set Int64Regs:$r,
25370b57cec5SDimitry Andric                             (int_nvvm_move_ptr texternalsym:$s))]>;*/
25380b57cec5SDimitry Andric
25390b57cec5SDimitry Andric
25400b57cec5SDimitry Andric// MoveParam        %r1, param
25410b57cec5SDimitry Andric// ptr_local_to_gen %r2, %r1
25420b57cec5SDimitry Andric// ptr_gen_to_local %r3, %r2
25430b57cec5SDimitry Andric// ->
25440b57cec5SDimitry Andric// mov %r1, param
25450b57cec5SDimitry Andric
25460b57cec5SDimitry Andric// @TODO: Revisit this.  There is a type
25470b57cec5SDimitry Andric// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym
25480b57cec5SDimitry Andric// instructions are not currently defined. However, we can use the ptr
25490b57cec5SDimitry Andric// variants and the asm printer will do the right thing.
25500b57cec5SDimitry Andricdef : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
25510b57cec5SDimitry Andric                (MoveParam texternalsym:$src)))),
25520b57cec5SDimitry Andric               (nvvm_move_ptr64  texternalsym:$src)>;
25530b57cec5SDimitry Andricdef : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
25540b57cec5SDimitry Andric                (MoveParam texternalsym:$src)))),
25550b57cec5SDimitry Andric               (nvvm_move_ptr32  texternalsym:$src)>;
25560b57cec5SDimitry Andric
25570b57cec5SDimitry Andricdef texsurf_handles
25580b57cec5SDimitry Andric  : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
25590b57cec5SDimitry Andric              "mov.u64 \t$result, $src;", []>;
25600b57cec5SDimitry Andric
25610b57cec5SDimitry Andric//-----------------------------------
25620b57cec5SDimitry Andric// Compiler Error Warn
25630b57cec5SDimitry Andric// - Just ignore them in codegen
25640b57cec5SDimitry Andric//-----------------------------------
25650b57cec5SDimitry Andric
25660b57cec5SDimitry Andricdef INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
25670b57cec5SDimitry Andric                "// llvm.nvvm.compiler.warn()",
25680b57cec5SDimitry Andric                [(int_nvvm_compiler_warn Int32Regs:$a)]>;
25690b57cec5SDimitry Andricdef INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
25700b57cec5SDimitry Andric                "// llvm.nvvm.compiler.warn()",
25710b57cec5SDimitry Andric                [(int_nvvm_compiler_warn Int64Regs:$a)]>;
25720b57cec5SDimitry Andricdef INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a),
25730b57cec5SDimitry Andric                "// llvm.nvvm.compiler.error()",
25740b57cec5SDimitry Andric                [(int_nvvm_compiler_error Int32Regs:$a)]>;
25750b57cec5SDimitry Andricdef INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
25760b57cec5SDimitry Andric                "// llvm.nvvm.compiler.error()",
25770b57cec5SDimitry Andric                [(int_nvvm_compiler_error Int64Regs:$a)]>;
25780b57cec5SDimitry Andric
25790b57cec5SDimitry Andric
25800b57cec5SDimitry Andric// isspacep
25810b57cec5SDimitry Andric
258206c3fb27SDimitry Andricmulticlass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
258306c3fb27SDimitry Andric  def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
258406c3fb27SDimitry Andric              "isspacep." # suffix # "\t$d, $a;",
258506c3fb27SDimitry Andric              [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
258606c3fb27SDimitry Andric    Requires<Preds>;
258706c3fb27SDimitry Andric  def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
258806c3fb27SDimitry Andric              "isspacep." # suffix # "\t$d, $a;",
258906c3fb27SDimitry Andric              [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
259006c3fb27SDimitry Andric    Requires<Preds>;
259106c3fb27SDimitry Andric}
25920b57cec5SDimitry Andric
259306c3fb27SDimitry Andricdefm isspace_const  : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
259406c3fb27SDimitry Andricdefm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
259506c3fb27SDimitry Andricdefm isspace_local  : ISSPACEP<"local", int_nvvm_isspacep_local>;
259606c3fb27SDimitry Andricdefm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
259706c3fb27SDimitry Andricdefm isspace_shared_cluster : ISSPACEP<"shared::cluster",
259806c3fb27SDimitry Andric                                       int_nvvm_isspacep_shared_cluster,
259906c3fb27SDimitry Andric                                       [hasPTX<78>, hasSM<90>]>;
26000b57cec5SDimitry Andric
26010b57cec5SDimitry Andric// Special register reads
26020b57cec5SDimitry Andricdef MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
26030b57cec5SDimitry Andric                            (ins SpecialRegs:$r),
26040b57cec5SDimitry Andric                            "mov.b32 \t$d, $r;", []>;
26050b57cec5SDimitry Andric
26060b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
26070b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
26080b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
26090b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
26100b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
26110b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
26120b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
26130b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
26140b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
26150b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
26160b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
26170b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
26180b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
26190b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
26200b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
26210b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
26220b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
26230b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
26240b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
26250b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
26260b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
26270b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
26280b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
26290b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
26300b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
26310b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
26320b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
26330b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
26340b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
26350b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
26360b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
26370b57cec5SDimitry Andricdef : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
26380b57cec5SDimitry Andric
26390b57cec5SDimitry Andric
26400b57cec5SDimitry Andric// rotate builtin support
26410b57cec5SDimitry Andric
26420b57cec5SDimitry Andricdef ROTATE_B32_HW_IMM
26430b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$dst),
26440b57cec5SDimitry Andric              (ins  Int32Regs:$src, i32imm:$amt),
26450b57cec5SDimitry Andric              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
26460b57cec5SDimitry Andric              [(set Int32Regs:$dst,
26470b57cec5SDimitry Andric                 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
26480b57cec5SDimitry Andric              Requires<[hasHWROT32]> ;
26490b57cec5SDimitry Andric
26500b57cec5SDimitry Andricdef ROTATE_B32_HW_REG
26510b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$dst),
26520b57cec5SDimitry Andric              (ins  Int32Regs:$src, Int32Regs:$amt),
26530b57cec5SDimitry Andric              "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
26540b57cec5SDimitry Andric              [(set Int32Regs:$dst,
26550b57cec5SDimitry Andric                 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
26560b57cec5SDimitry Andric              Requires<[hasHWROT32]> ;
26570b57cec5SDimitry Andric
26580b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
26590b57cec5SDimitry Andric          (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
26600b57cec5SDimitry Andric      Requires<[noHWROT32]> ;
26610b57cec5SDimitry Andric
26620b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
26630b57cec5SDimitry Andric          (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
26640b57cec5SDimitry Andric      Requires<[noHWROT32]> ;
26650b57cec5SDimitry Andric
2666e8d8bef9SDimitry Andriclet hasSideEffects = false in {
26670b57cec5SDimitry Andric  def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
26680b57cec5SDimitry Andric    !strconcat("{{\n\t",
26690b57cec5SDimitry Andric               ".reg .b32 %dummy;\n\t",
26700b57cec5SDimitry Andric               "mov.b64 \t{$dst,%dummy}, $src;\n\t",
26710b57cec5SDimitry Andric               "}}"),
26720b57cec5SDimitry Andric          []> ;
26730b57cec5SDimitry Andric
26740b57cec5SDimitry Andric  def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
26750b57cec5SDimitry Andric    !strconcat("{{\n\t",
26760b57cec5SDimitry Andric               ".reg .b32 %dummy;\n\t",
26770b57cec5SDimitry Andric               "mov.b64 \t{%dummy,$dst}, $src;\n\t",
26780b57cec5SDimitry Andric               "}}"),
26790b57cec5SDimitry Andric          []> ;
26800b57cec5SDimitry Andric}
26810b57cec5SDimitry Andric
2682e8d8bef9SDimitry Andriclet hasSideEffects = false in {
26830b57cec5SDimitry Andric  def PACK_TWO_INT32
26840b57cec5SDimitry Andric    : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
26850b57cec5SDimitry Andric                "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
26860b57cec5SDimitry Andric}
26870b57cec5SDimitry Andric
26880b57cec5SDimitry Andricdef : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
26890b57cec5SDimitry Andric          (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
26900b57cec5SDimitry Andric                          (GET_LO_INT64 Int64Regs:$src))> ;
26910b57cec5SDimitry Andric
26920b57cec5SDimitry Andric// Funnel shift, requires >= sm_32.  Does not trap if amt is out of range, so
26930b57cec5SDimitry Andric// no side effects.
2694e8d8bef9SDimitry Andriclet hasSideEffects = false in {
26950b57cec5SDimitry Andric  def SHF_L_WRAP_B32_IMM
26960b57cec5SDimitry Andric    : NVPTXInst<(outs Int32Regs:$dst),
26970b57cec5SDimitry Andric                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
26980b57cec5SDimitry Andric                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
26990b57cec5SDimitry Andric      Requires<[hasHWROT32]>;
27000b57cec5SDimitry Andric
27010b57cec5SDimitry Andric  def SHF_L_WRAP_B32_REG
27020b57cec5SDimitry Andric    : NVPTXInst<(outs Int32Regs:$dst),
27030b57cec5SDimitry Andric                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
27040b57cec5SDimitry Andric                "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
27050b57cec5SDimitry Andric      Requires<[hasHWROT32]>;
27060b57cec5SDimitry Andric
27070b57cec5SDimitry Andric  def SHF_R_WRAP_B32_IMM
27080b57cec5SDimitry Andric    : NVPTXInst<(outs Int32Regs:$dst),
27090b57cec5SDimitry Andric                (ins  Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
27100b57cec5SDimitry Andric                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
27110b57cec5SDimitry Andric      Requires<[hasHWROT32]>;
27120b57cec5SDimitry Andric
27130b57cec5SDimitry Andric  def SHF_R_WRAP_B32_REG
27140b57cec5SDimitry Andric    : NVPTXInst<(outs Int32Regs:$dst),
27150b57cec5SDimitry Andric                (ins  Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
27160b57cec5SDimitry Andric                "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
27170b57cec5SDimitry Andric      Requires<[hasHWROT32]>;
27180b57cec5SDimitry Andric}
27190b57cec5SDimitry Andric
27200b57cec5SDimitry Andric// HW version of rotate 64
27210b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
27220b57cec5SDimitry Andric          (PACK_TWO_INT32
27230b57cec5SDimitry Andric            (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
27240b57cec5SDimitry Andric                                (GET_LO_INT64 Int64Regs:$src), imm:$amt),
27250b57cec5SDimitry Andric            (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
27260b57cec5SDimitry Andric                                (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
27270b57cec5SDimitry Andric      Requires<[hasHWROT32]>;
27280b57cec5SDimitry Andric
27290b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
27300b57cec5SDimitry Andric          (PACK_TWO_INT32
27310b57cec5SDimitry Andric            (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
27320b57cec5SDimitry Andric                                (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
27330b57cec5SDimitry Andric            (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
27340b57cec5SDimitry Andric                               (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
27350b57cec5SDimitry Andric      Requires<[hasHWROT32]>;
27360b57cec5SDimitry Andric
27370b57cec5SDimitry Andric
27380b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
27390b57cec5SDimitry Andric          (PACK_TWO_INT32
27400b57cec5SDimitry Andric            (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
27410b57cec5SDimitry Andric                                (GET_HI_INT64 Int64Regs:$src), imm:$amt),
27420b57cec5SDimitry Andric            (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
27430b57cec5SDimitry Andric                                (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
27440b57cec5SDimitry Andric      Requires<[hasHWROT32]>;
27450b57cec5SDimitry Andric
27460b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
27470b57cec5SDimitry Andric          (PACK_TWO_INT32
27480b57cec5SDimitry Andric            (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
27490b57cec5SDimitry Andric                                (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
27500b57cec5SDimitry Andric            (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
27510b57cec5SDimitry Andric                               (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
27520b57cec5SDimitry Andric      Requires<[hasHWROT32]>;
27530b57cec5SDimitry Andric
27540b57cec5SDimitry Andric// SW version of rotate 64
27550b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
275681ad6265SDimitry Andric          (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>,
27570b57cec5SDimitry Andric      Requires<[noHWROT32]>;
27580b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
27590b57cec5SDimitry Andric          (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
27600b57cec5SDimitry Andric      Requires<[noHWROT32]>;
27610b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
27620b57cec5SDimitry Andric          (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
27630b57cec5SDimitry Andric      Requires<[noHWROT32]>;
27640b57cec5SDimitry Andricdef : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
27650b57cec5SDimitry Andric          (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
27660b57cec5SDimitry Andric      Requires<[noHWROT32]>;
27670b57cec5SDimitry Andric
27680b57cec5SDimitry Andric
27690b57cec5SDimitry Andric//-----------------------------------
27700b57cec5SDimitry Andric// Texture Intrinsics
27710b57cec5SDimitry Andric//-----------------------------------
27720b57cec5SDimitry Andric
27730b57cec5SDimitry Andric// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
27740b57cec5SDimitry Andric// also defined in NVPTXReplaceImageHandles.cpp
27750b57cec5SDimitry Andric
27760b57cec5SDimitry Andric// texmode_independent
2777e8d8bef9SDimitry Andriclet IsTex = true, IsTexModeUnified = false in {
27780b57cec5SDimitry Andric// Texture fetch instructions using handles
2779349cc55cSDimitry Andric
2780349cc55cSDimitry Andricclass TEX_1D_base<string inst, NVPTXRegClass outtype,
2781349cc55cSDimitry Andric                  NVPTXRegClass intype, dag texsamp>
2782349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
2783349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
2784349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x)),
2785349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
27860b57cec5SDimitry Andric                 []>;
27870b57cec5SDimitry Andric
2788349cc55cSDimitry Andricmulticlass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2789349cc55cSDimitry Andric  def _RR : TEX_1D_base<inst, outtype, intype,
2790349cc55cSDimitry Andric                        (ins Int64Regs:$t, Int64Regs:$s)>;
2791349cc55cSDimitry Andric  def _RI : TEX_1D_base<inst, outtype, intype,
2792349cc55cSDimitry Andric                        (ins Int64Regs:$t, i64imm:$s)>;
2793349cc55cSDimitry Andric  def _IR : TEX_1D_base<inst, outtype, intype,
2794349cc55cSDimitry Andric                        (ins i64imm:$t, Int64Regs:$s)>;
2795349cc55cSDimitry Andric  def _II : TEX_1D_base<inst, outtype, intype,
2796349cc55cSDimitry Andric                        (ins i64imm:$t, i64imm:$s)>;
2797349cc55cSDimitry Andric}
2798349cc55cSDimitry Andric
2799349cc55cSDimitry Andricdefm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
2800349cc55cSDimitry Andricdefm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2801349cc55cSDimitry Andricdefm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
2802349cc55cSDimitry Andricdefm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2803349cc55cSDimitry Andricdefm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
2804349cc55cSDimitry Andricdefm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2805349cc55cSDimitry Andric
2806349cc55cSDimitry Andricclass TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
2807349cc55cSDimitry Andric                        NVPTXRegClass intype, dag texsamp>
2808349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
2809349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
2810349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$lod)),
2811349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
28120b57cec5SDimitry Andric                 []>;
2813349cc55cSDimitry Andric
2814349cc55cSDimitry Andricmulticlass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
2815349cc55cSDimitry Andric                        NVPTXRegClass intype> {
2816349cc55cSDimitry Andric  def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
2817349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
2818349cc55cSDimitry Andric  def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
2819349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
2820349cc55cSDimitry Andric  def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
2821349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
2822349cc55cSDimitry Andric  def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
2823349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
2824349cc55cSDimitry Andric}
2825349cc55cSDimitry Andric
2826349cc55cSDimitry Andricdefm TEX_1D_F32_F32_LEVEL :
2827349cc55cSDimitry Andric  TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2828349cc55cSDimitry Andricdefm TEX_1D_S32_F32_LEVEL :
2829349cc55cSDimitry Andric  TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2830349cc55cSDimitry Andricdefm TEX_1D_U32_F32_LEVEL :
2831349cc55cSDimitry Andric  TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2832349cc55cSDimitry Andric
2833349cc55cSDimitry Andricclass TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
2834349cc55cSDimitry Andric                       NVPTXRegClass intype, dag texsamp>
2835349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
2836349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
2837349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
2838349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
2839349cc55cSDimitry Andric                        " \\{$gradx\\}, \\{$grady\\};",
28400b57cec5SDimitry Andric                 []>;
2841349cc55cSDimitry Andric
2842349cc55cSDimitry Andricmulticlass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
2843349cc55cSDimitry Andric                       NVPTXRegClass intype> {
2844349cc55cSDimitry Andric  def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
2845349cc55cSDimitry Andric                             (ins Int64Regs:$t, Int64Regs:$s)>;
2846349cc55cSDimitry Andric  def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
2847349cc55cSDimitry Andric                             (ins Int64Regs:$t, i64imm:$s)>;
2848349cc55cSDimitry Andric  def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
2849349cc55cSDimitry Andric                             (ins i64imm:$t, Int64Regs:$s)>;
2850349cc55cSDimitry Andric  def _II : TEX_1D_GRAD_base<inst, outtype, intype,
2851349cc55cSDimitry Andric                             (ins i64imm:$t, i64imm:$s)>;
2852349cc55cSDimitry Andric}
2853349cc55cSDimitry Andric
2854349cc55cSDimitry Andricdefm TEX_1D_F32_F32_GRAD
2855349cc55cSDimitry Andric  : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
2856349cc55cSDimitry Andricdefm TEX_1D_S32_F32_GRAD
2857349cc55cSDimitry Andric  : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
2858349cc55cSDimitry Andricdefm TEX_1D_U32_F32_GRAD
2859349cc55cSDimitry Andric  : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
2860349cc55cSDimitry Andric
2861349cc55cSDimitry Andricclass TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
2862349cc55cSDimitry Andric                        NVPTXRegClass intype, dag texsamp>
2863349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
2864349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
2865349cc55cSDimitry Andric                 !con(texsamp, (ins Int32Regs:$l, intype:$x)),
2866349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
2867349cc55cSDimitry Andric                 []>;
2868349cc55cSDimitry Andric
2869349cc55cSDimitry Andricmulticlass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
2870349cc55cSDimitry Andric                        NVPTXRegClass intype> {
2871349cc55cSDimitry Andric  def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
2872349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
2873349cc55cSDimitry Andric  def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
2874349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
2875349cc55cSDimitry Andric  def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
2876349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
2877349cc55cSDimitry Andric  def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
2878349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
2879349cc55cSDimitry Andric}
2880349cc55cSDimitry Andric
2881349cc55cSDimitry Andricdefm TEX_1D_ARRAY_F32_F32
2882349cc55cSDimitry Andric  : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2883349cc55cSDimitry Andricdefm TEX_1D_ARRAY_F32_S32
2884349cc55cSDimitry Andric  : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
2885349cc55cSDimitry Andricdefm TEX_1D_ARRAY_S32_S32
2886349cc55cSDimitry Andric  : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
2887349cc55cSDimitry Andricdefm TEX_1D_ARRAY_S32_F32
2888349cc55cSDimitry Andric  : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2889349cc55cSDimitry Andricdefm TEX_1D_ARRAY_U32_S32
2890349cc55cSDimitry Andric  : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
2891349cc55cSDimitry Andricdefm TEX_1D_ARRAY_U32_F32
2892349cc55cSDimitry Andric  : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2893349cc55cSDimitry Andric
2894349cc55cSDimitry Andricclass TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
2895349cc55cSDimitry Andric                              NVPTXRegClass intype, dag texsamp>
2896349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
2897349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
2898349cc55cSDimitry Andric                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
2899349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
29000b57cec5SDimitry Andric                        " [$t, $s, \\{$l, $x\\}], $lod;",
29010b57cec5SDimitry Andric                 []>;
2902349cc55cSDimitry Andric
2903349cc55cSDimitry Andricmulticlass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
2904349cc55cSDimitry Andric                              NVPTXRegClass intype> {
2905349cc55cSDimitry Andric  def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2906349cc55cSDimitry Andric                                    (ins Int64Regs:$t, Int64Regs:$s)>;
2907349cc55cSDimitry Andric  def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2908349cc55cSDimitry Andric                                    (ins Int64Regs:$t, i64imm:$s)>;
2909349cc55cSDimitry Andric  def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2910349cc55cSDimitry Andric                                    (ins i64imm:$t, Int64Regs:$s)>;
2911349cc55cSDimitry Andric  def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
2912349cc55cSDimitry Andric                                    (ins i64imm:$t, i64imm:$s)>;
2913349cc55cSDimitry Andric}
2914349cc55cSDimitry Andric
2915349cc55cSDimitry Andricdefm TEX_1D_ARRAY_F32_F32_LEVEL
2916349cc55cSDimitry Andric  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2917349cc55cSDimitry Andricdefm TEX_1D_ARRAY_S32_F32_LEVEL
2918349cc55cSDimitry Andric  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2919349cc55cSDimitry Andricdefm TEX_1D_ARRAY_U32_F32_LEVEL
2920349cc55cSDimitry Andric  : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2921349cc55cSDimitry Andric
2922349cc55cSDimitry Andricclass TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
2923349cc55cSDimitry Andric                             NVPTXRegClass intype, dag texsamp>
2924349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
2925349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
2926349cc55cSDimitry Andric                 !con(texsamp, (ins Int32Regs:$l, intype:$x,
2927349cc55cSDimitry Andric                                    intype:$gradx, intype:$grady)),
2928349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
2929349cc55cSDimitry Andric                        " \\{$gradx\\}, \\{$grady\\};",
29300b57cec5SDimitry Andric                 []>;
29310b57cec5SDimitry Andric
2932349cc55cSDimitry Andricmulticlass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
2933349cc55cSDimitry Andric                             NVPTXRegClass intype> {
2934349cc55cSDimitry Andric  def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2935349cc55cSDimitry Andric                                   (ins Int64Regs:$t, Int64Regs:$s)>;
2936349cc55cSDimitry Andric  def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2937349cc55cSDimitry Andric                                   (ins Int64Regs:$t, i64imm:$s)>;
2938349cc55cSDimitry Andric  def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2939349cc55cSDimitry Andric                                   (ins i64imm:$t, Int64Regs:$s)>;
2940349cc55cSDimitry Andric  def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
2941349cc55cSDimitry Andric                                   (ins i64imm:$t, i64imm:$s)>;
2942349cc55cSDimitry Andric}
2943349cc55cSDimitry Andric
2944349cc55cSDimitry Andricdefm TEX_1D_ARRAY_F32_F32_GRAD
2945349cc55cSDimitry Andric  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
2946349cc55cSDimitry Andricdefm TEX_1D_ARRAY_S32_F32_GRAD
2947349cc55cSDimitry Andric  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
2948349cc55cSDimitry Andricdefm TEX_1D_ARRAY_U32_F32_GRAD
2949349cc55cSDimitry Andric  : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
2950349cc55cSDimitry Andric
2951349cc55cSDimitry Andricclass TEX_2D_base<string inst, NVPTXRegClass outtype,
2952349cc55cSDimitry Andric                  NVPTXRegClass intype, dag texsamp>
2953349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
2954349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
2955349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y)),
2956349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
29570b57cec5SDimitry Andric                 []>;
29580b57cec5SDimitry Andric
2959349cc55cSDimitry Andricmulticlass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
2960349cc55cSDimitry Andric  def _RR : TEX_2D_base<inst, outtype, intype,
2961349cc55cSDimitry Andric                        (ins Int64Regs:$t, Int64Regs:$s)>;
2962349cc55cSDimitry Andric  def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
2963349cc55cSDimitry Andric  def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
2964349cc55cSDimitry Andric  def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
2965349cc55cSDimitry Andric}
2966349cc55cSDimitry Andric
2967349cc55cSDimitry Andricdefm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2968349cc55cSDimitry Andricdefm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
2969349cc55cSDimitry Andricdefm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
2970349cc55cSDimitry Andricdefm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2971349cc55cSDimitry Andricdefm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
2972349cc55cSDimitry Andricdefm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
2973349cc55cSDimitry Andric
2974349cc55cSDimitry Andricclass TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
2975349cc55cSDimitry Andric                        NVPTXRegClass intype, dag texsamp>
2976349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
2977349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
2978349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
2979349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
2980349cc55cSDimitry Andric                        " [$t, $s, \\{$x, $y\\}], $lod;",
29810b57cec5SDimitry Andric                 []>;
29820b57cec5SDimitry Andric
2983349cc55cSDimitry Andricmulticlass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
2984349cc55cSDimitry Andric                        NVPTXRegClass intype> {
2985349cc55cSDimitry Andric  def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
2986349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
2987349cc55cSDimitry Andric  def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
2988349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
2989349cc55cSDimitry Andric  def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
2990349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
2991349cc55cSDimitry Andric  def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
2992349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
2993349cc55cSDimitry Andric}
2994349cc55cSDimitry Andric
2995349cc55cSDimitry Andricdefm TEX_2D_F32_F32_LEVEL :
2996349cc55cSDimitry Andric  TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
2997349cc55cSDimitry Andricdefm TEX_2D_S32_F32_LEVEL :
2998349cc55cSDimitry Andric  TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
2999349cc55cSDimitry Andricdefm TEX_2D_U32_F32_LEVEL :
3000349cc55cSDimitry Andric  TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3001349cc55cSDimitry Andric
3002349cc55cSDimitry Andricclass TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3003349cc55cSDimitry Andric                       NVPTXRegClass intype, dag texsamp>
3004349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3005349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3006349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y,
3007349cc55cSDimitry Andric                                    intype:$gradx0, intype:$gradx1,
3008349cc55cSDimitry Andric                                    intype:$grady0, intype:$grady1)),
3009349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
3010349cc55cSDimitry Andric                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3011349cc55cSDimitry Andric                 []>;
3012349cc55cSDimitry Andric
3013349cc55cSDimitry Andricmulticlass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
3014349cc55cSDimitry Andric                       NVPTXRegClass intype> {
3015349cc55cSDimitry Andric  def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
3016349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
3017349cc55cSDimitry Andric  def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
3018349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
3019349cc55cSDimitry Andric  def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
3020349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
3021349cc55cSDimitry Andric  def _II : TEX_2D_GRAD_base<inst, outtype, intype,
3022349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
3023349cc55cSDimitry Andric}
3024349cc55cSDimitry Andric
3025349cc55cSDimitry Andricdefm TEX_2D_F32_F32_GRAD :
3026349cc55cSDimitry Andric  TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3027349cc55cSDimitry Andricdefm TEX_2D_S32_F32_GRAD :
3028349cc55cSDimitry Andric  TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3029349cc55cSDimitry Andricdefm TEX_2D_U32_F32_GRAD :
3030349cc55cSDimitry Andric  TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3031349cc55cSDimitry Andric
3032349cc55cSDimitry Andricclass TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3033349cc55cSDimitry Andric                        NVPTXRegClass intype, dag texsamp>
3034349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3035349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3036349cc55cSDimitry Andric                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
3037349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
3038349cc55cSDimitry Andric                        " [$t, $s, \\{$l, $x, $y, $y\\}];",
3039349cc55cSDimitry Andric                 []>;
3040349cc55cSDimitry Andric
3041349cc55cSDimitry Andricmulticlass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
3042349cc55cSDimitry Andric                        NVPTXRegClass intype> {
3043349cc55cSDimitry Andric  def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
3044349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
3045349cc55cSDimitry Andric  def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
3046349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
3047349cc55cSDimitry Andric  def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
3048349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
3049349cc55cSDimitry Andric  def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
3050349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
3051349cc55cSDimitry Andric}
3052349cc55cSDimitry Andric
3053349cc55cSDimitry Andricdefm TEX_2D_ARRAY_F32_F32
3054349cc55cSDimitry Andric  : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3055349cc55cSDimitry Andricdefm TEX_2D_ARRAY_F32_S32
3056349cc55cSDimitry Andric  : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3057349cc55cSDimitry Andricdefm TEX_2D_ARRAY_S32_S32
3058349cc55cSDimitry Andric  : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3059349cc55cSDimitry Andricdefm TEX_2D_ARRAY_S32_F32
3060349cc55cSDimitry Andric  : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3061349cc55cSDimitry Andricdefm TEX_2D_ARRAY_U32_S32
3062349cc55cSDimitry Andric  : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3063349cc55cSDimitry Andricdefm TEX_2D_ARRAY_U32_F32
3064349cc55cSDimitry Andric  : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3065349cc55cSDimitry Andric
3066349cc55cSDimitry Andricclass TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3067349cc55cSDimitry Andric                              NVPTXRegClass intype, dag texsamp>
3068349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3069349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3070349cc55cSDimitry Andric                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3071349cc55cSDimitry Andric                                    intype:$lod)),
3072349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
3073349cc55cSDimitry Andric                        " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
3074349cc55cSDimitry Andric                 []>;
3075349cc55cSDimitry Andric
3076349cc55cSDimitry Andricmulticlass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3077349cc55cSDimitry Andric                              NVPTXRegClass intype> {
3078349cc55cSDimitry Andric  def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3079349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
3080349cc55cSDimitry Andric  def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3081349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
3082349cc55cSDimitry Andric  def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3083349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
3084349cc55cSDimitry Andric  def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3085349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
3086349cc55cSDimitry Andric}
3087349cc55cSDimitry Andric
3088349cc55cSDimitry Andricdefm TEX_2D_ARRAY_F32_F32_LEVEL
3089349cc55cSDimitry Andric  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3090349cc55cSDimitry Andricdefm TEX_2D_ARRAY_S32_F32_LEVEL
3091349cc55cSDimitry Andric  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3092349cc55cSDimitry Andricdefm TEX_2D_ARRAY_U32_F32_LEVEL
3093349cc55cSDimitry Andric  : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3094349cc55cSDimitry Andric
3095349cc55cSDimitry Andricclass TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3096349cc55cSDimitry Andric                             NVPTXRegClass intype, dag texsamp>
3097349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3098349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3099349cc55cSDimitry Andric                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3100349cc55cSDimitry Andric                                    intype:$gradx0, intype:$gradx1,
3101349cc55cSDimitry Andric                                    intype:$grady0, intype:$grady1)),
3102349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
3103349cc55cSDimitry Andric                        " [$t, $s, \\{$l, $x, $y, $y\\}],"
3104349cc55cSDimitry Andric                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3105349cc55cSDimitry Andric                 []>;
3106349cc55cSDimitry Andric
3107349cc55cSDimitry Andricmulticlass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3108349cc55cSDimitry Andric                             NVPTXRegClass intype> {
3109349cc55cSDimitry Andric  def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3110349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
3111349cc55cSDimitry Andric  def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3112349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
3113349cc55cSDimitry Andric  def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3114349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
3115349cc55cSDimitry Andric  def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
3116349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
3117349cc55cSDimitry Andric}
3118349cc55cSDimitry Andric
3119349cc55cSDimitry Andricdefm TEX_2D_ARRAY_F32_F32_GRAD
3120349cc55cSDimitry Andric  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3121349cc55cSDimitry Andricdefm TEX_2D_ARRAY_S32_F32_GRAD
3122349cc55cSDimitry Andric  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3123349cc55cSDimitry Andricdefm TEX_2D_ARRAY_U32_F32_GRAD
3124349cc55cSDimitry Andric  : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3125349cc55cSDimitry Andric
3126349cc55cSDimitry Andricclass TEX_3D_base<string inst, NVPTXRegClass outtype,
3127349cc55cSDimitry Andric                  NVPTXRegClass intype, dag texsamp>
3128349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3129349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3130349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3131349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
31320b57cec5SDimitry Andric                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
31330b57cec5SDimitry Andric                 []>;
3134349cc55cSDimitry Andric
3135349cc55cSDimitry Andricmulticlass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3136349cc55cSDimitry Andric  def _RR : TEX_3D_base<inst, outtype, intype,
3137349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
3138349cc55cSDimitry Andric  def _RI : TEX_3D_base<inst, outtype, intype,
3139349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
3140349cc55cSDimitry Andric  def _IR : TEX_3D_base<inst, outtype, intype,
3141349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
3142349cc55cSDimitry Andric  def _II : TEX_3D_base<inst, outtype, intype,
3143349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
3144349cc55cSDimitry Andric}
3145349cc55cSDimitry Andric
3146349cc55cSDimitry Andricdefm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3147349cc55cSDimitry Andricdefm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3148349cc55cSDimitry Andricdefm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3149349cc55cSDimitry Andricdefm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3150349cc55cSDimitry Andricdefm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3151349cc55cSDimitry Andricdefm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3152349cc55cSDimitry Andric
3153349cc55cSDimitry Andricclass TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3154349cc55cSDimitry Andric                        NVPTXRegClass intype, dag texsamp>
3155349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3156349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3157349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3158349cc55cSDimitry Andric                                    intype:$lod)),
3159349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
31600b57cec5SDimitry Andric                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
31610b57cec5SDimitry Andric                 []>;
3162349cc55cSDimitry Andric
3163349cc55cSDimitry Andricmulticlass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
3164349cc55cSDimitry Andric                        NVPTXRegClass intype> {
3165349cc55cSDimitry Andric  def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
3166349cc55cSDimitry Andric                              (ins Int64Regs:$t, Int64Regs:$s)>;
3167349cc55cSDimitry Andric  def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
3168349cc55cSDimitry Andric                              (ins Int64Regs:$t, i64imm:$s)>;
3169349cc55cSDimitry Andric  def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
3170349cc55cSDimitry Andric                              (ins i64imm:$t, Int64Regs:$s)>;
3171349cc55cSDimitry Andric  def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
3172349cc55cSDimitry Andric                              (ins i64imm:$t, i64imm:$s)>;
3173349cc55cSDimitry Andric}
3174349cc55cSDimitry Andric
3175349cc55cSDimitry Andricdefm TEX_3D_F32_F32_LEVEL
3176349cc55cSDimitry Andric  : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3177349cc55cSDimitry Andricdefm TEX_3D_S32_F32_LEVEL
3178349cc55cSDimitry Andric  : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3179349cc55cSDimitry Andricdefm TEX_3D_U32_F32_LEVEL
3180349cc55cSDimitry Andric  : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3181349cc55cSDimitry Andric
3182349cc55cSDimitry Andricclass TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3183349cc55cSDimitry Andric                       NVPTXRegClass intype, dag texsamp>
3184349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3185349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3186349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3187349cc55cSDimitry Andric                                    intype :$gradx0, intype:$gradx1,
3188349cc55cSDimitry Andric                                    intype:$gradx2, intype:$grady0,
3189349cc55cSDimitry Andric                                    intype:$grady1, intype:$grady2)),
3190349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
31910b57cec5SDimitry Andric                        " [$t, $s, \\{$x, $y, $z, $z\\}],"
31920b57cec5SDimitry Andric                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
31930b57cec5SDimitry Andric                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
31940b57cec5SDimitry Andric                 []>;
31950b57cec5SDimitry Andric
3196349cc55cSDimitry Andricmulticlass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
3197349cc55cSDimitry Andric                       NVPTXRegClass intype> {
3198349cc55cSDimitry Andric  def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
3199349cc55cSDimitry Andric                             (ins Int64Regs:$t, Int64Regs:$s)>;
3200349cc55cSDimitry Andric  def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
3201349cc55cSDimitry Andric                             (ins Int64Regs:$t, i64imm:$s)>;
3202349cc55cSDimitry Andric  def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
3203349cc55cSDimitry Andric                             (ins i64imm:$t, Int64Regs:$s)>;
3204349cc55cSDimitry Andric  def _II : TEX_3D_GRAD_base<inst, outtype, intype,
3205349cc55cSDimitry Andric                             (ins i64imm:$t, i64imm:$s)>;
3206349cc55cSDimitry Andric}
3207349cc55cSDimitry Andric
3208349cc55cSDimitry Andricdefm TEX_3D_F32_F32_GRAD
3209349cc55cSDimitry Andric  : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3210349cc55cSDimitry Andricdefm TEX_3D_S32_F32_GRAD
3211349cc55cSDimitry Andric  : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3212349cc55cSDimitry Andricdefm TEX_3D_U32_F32_GRAD
3213349cc55cSDimitry Andric  : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3214349cc55cSDimitry Andric
3215349cc55cSDimitry Andricclass TEX_CUBE_base<string inst, NVPTXRegClass outtype,
3216349cc55cSDimitry Andric                    NVPTXRegClass intype, dag texsamp>
3217349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3218349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3219349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
3220349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
32210b57cec5SDimitry Andric                        " [$t, $s, \\{$x, $y, $z, $z\\}];",
32220b57cec5SDimitry Andric                 []>;
3223349cc55cSDimitry Andric
3224349cc55cSDimitry Andricmulticlass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3225349cc55cSDimitry Andric  def _RR : TEX_CUBE_base<inst, outtype, intype,
3226349cc55cSDimitry Andric                          (ins Int64Regs:$t, Int64Regs:$s)>;
3227349cc55cSDimitry Andric  def _RI : TEX_CUBE_base<inst, outtype, intype,
3228349cc55cSDimitry Andric                          (ins Int64Regs:$t, i64imm:$s)>;
3229349cc55cSDimitry Andric  def _IR : TEX_CUBE_base<inst, outtype, intype,
3230349cc55cSDimitry Andric                          (ins i64imm:$t, Int64Regs:$s)>;
3231349cc55cSDimitry Andric  def _II : TEX_CUBE_base<inst, outtype, intype,
3232349cc55cSDimitry Andric                          (ins i64imm:$t, i64imm:$s)>;
3233349cc55cSDimitry Andric}
3234349cc55cSDimitry Andric
3235349cc55cSDimitry Andricdefm TEX_CUBE_F32_F32
3236349cc55cSDimitry Andric  : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3237349cc55cSDimitry Andricdefm TEX_CUBE_S32_F32
3238349cc55cSDimitry Andric  : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3239349cc55cSDimitry Andricdefm TEX_CUBE_U32_F32
3240349cc55cSDimitry Andric  : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3241349cc55cSDimitry Andric
3242349cc55cSDimitry Andricclass TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3243349cc55cSDimitry Andric                          NVPTXRegClass intype, dag texsamp>
3244349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3245349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3246349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
3247349cc55cSDimitry Andric                                    intype:$lod)),
3248349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
32490b57cec5SDimitry Andric                        " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
32500b57cec5SDimitry Andric                 []>;
32510b57cec5SDimitry Andric
3252349cc55cSDimitry Andricmulticlass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3253349cc55cSDimitry Andric                          NVPTXRegClass intype> {
3254349cc55cSDimitry Andric  def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3255349cc55cSDimitry Andric                                (ins Int64Regs:$t, Int64Regs:$s)>;
3256349cc55cSDimitry Andric  def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3257349cc55cSDimitry Andric                                (ins Int64Regs:$t, i64imm:$s)>;
3258349cc55cSDimitry Andric  def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3259349cc55cSDimitry Andric                                (ins i64imm:$t, Int64Regs:$s)>;
3260349cc55cSDimitry Andric  def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
3261349cc55cSDimitry Andric                                (ins i64imm:$t, i64imm:$s)>;
3262349cc55cSDimitry Andric}
3263349cc55cSDimitry Andric
3264349cc55cSDimitry Andricdefm TEX_CUBE_F32_F32_LEVEL
3265349cc55cSDimitry Andric  : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3266349cc55cSDimitry Andricdefm TEX_CUBE_S32_F32_LEVEL
3267349cc55cSDimitry Andric  : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3268349cc55cSDimitry Andricdefm TEX_CUBE_U32_F32_LEVEL
3269349cc55cSDimitry Andric  : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3270349cc55cSDimitry Andric
3271349cc55cSDimitry Andricclass TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3272349cc55cSDimitry Andric                          NVPTXRegClass intype, dag texsamp>
3273349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3274349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3275349cc55cSDimitry Andric                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3276349cc55cSDimitry Andric                                    intype:$z)),
3277349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
32780b57cec5SDimitry Andric                        " [$t, $s, \\{$l, $x, $y, $z\\}];",
32790b57cec5SDimitry Andric                 []>;
3280349cc55cSDimitry Andric
3281349cc55cSDimitry Andricmulticlass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3282349cc55cSDimitry Andric                          NVPTXRegClass intype> {
3283349cc55cSDimitry Andric  def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3284349cc55cSDimitry Andric                                (ins Int64Regs:$t, Int64Regs:$s)>;
3285349cc55cSDimitry Andric  def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3286349cc55cSDimitry Andric                                (ins Int64Regs:$t, i64imm:$s)>;
3287349cc55cSDimitry Andric  def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3288349cc55cSDimitry Andric                                (ins i64imm:$t, Int64Regs:$s)>;
3289349cc55cSDimitry Andric  def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
3290349cc55cSDimitry Andric                                (ins i64imm:$t, i64imm:$s)>;
3291349cc55cSDimitry Andric}
3292349cc55cSDimitry Andric
3293349cc55cSDimitry Andricdefm TEX_CUBE_ARRAY_F32_F32
3294349cc55cSDimitry Andric  : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3295349cc55cSDimitry Andricdefm TEX_CUBE_ARRAY_S32_F32
3296349cc55cSDimitry Andric  : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3297349cc55cSDimitry Andricdefm TEX_CUBE_ARRAY_U32_F32
3298349cc55cSDimitry Andric  : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3299349cc55cSDimitry Andric
3300349cc55cSDimitry Andricclass TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3301349cc55cSDimitry Andric                                NVPTXRegClass intype, dag texsamp>
3302349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3303349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3304349cc55cSDimitry Andric                 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
3305349cc55cSDimitry Andric                                    intype:$z, intype:$lod)),
3306349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
33070b57cec5SDimitry Andric                        " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
33080b57cec5SDimitry Andric                 []>;
33090b57cec5SDimitry Andric
3310349cc55cSDimitry Andricmulticlass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3311349cc55cSDimitry Andric                                NVPTXRegClass intype> {
3312349cc55cSDimitry Andric  def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3313349cc55cSDimitry Andric                                      (ins Int64Regs:$t, Int64Regs:$s)>;
3314349cc55cSDimitry Andric  def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3315349cc55cSDimitry Andric                                      (ins Int64Regs:$t, i64imm:$s)>;
3316349cc55cSDimitry Andric  def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3317349cc55cSDimitry Andric                                      (ins i64imm:$t, Int64Regs:$s)>;
3318349cc55cSDimitry Andric  def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3319349cc55cSDimitry Andric                                      (ins i64imm:$t, i64imm:$s)>;
3320349cc55cSDimitry Andric}
3321349cc55cSDimitry Andric
3322349cc55cSDimitry Andricdefm TEX_CUBE_ARRAY_F32_F32_LEVEL
3323349cc55cSDimitry Andric  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3324349cc55cSDimitry Andric                         Float32Regs, Float32Regs>;
3325349cc55cSDimitry Andricdefm TEX_CUBE_ARRAY_S32_F32_LEVEL
3326349cc55cSDimitry Andric  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3327349cc55cSDimitry Andric                         Int32Regs, Float32Regs>;
3328349cc55cSDimitry Andricdefm TEX_CUBE_ARRAY_U32_F32_LEVEL
3329349cc55cSDimitry Andric  : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3330349cc55cSDimitry Andric                         Int32Regs, Float32Regs>;
3331349cc55cSDimitry Andric
3332349cc55cSDimitry Andricclass TLD4_2D_base<string inst, NVPTXRegClass outtype,
3333349cc55cSDimitry Andric                   NVPTXRegClass intype, dag texsamp>
3334349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3335349cc55cSDimitry Andric                      outtype:$v2, outtype:$v3),
3336349cc55cSDimitry Andric                 !con(texsamp, (ins intype:$x, intype:$y)),
3337349cc55cSDimitry Andric                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
33380b57cec5SDimitry Andric                 []>;
3339349cc55cSDimitry Andric
3340349cc55cSDimitry Andricmulticlass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
3341349cc55cSDimitry Andric  def _RR : TLD4_2D_base<inst, outtype, intype,
3342349cc55cSDimitry Andric                         (ins Int64Regs:$t, Int64Regs:$s)>;
3343349cc55cSDimitry Andric  def _RI : TLD4_2D_base<inst, outtype, intype,
3344349cc55cSDimitry Andric                         (ins Int64Regs:$t, i64imm:$s)>;
3345349cc55cSDimitry Andric  def _IR : TLD4_2D_base<inst, outtype, intype,
3346349cc55cSDimitry Andric                         (ins i64imm:$t, Int64Regs:$s)>;
3347349cc55cSDimitry Andric  def _II : TLD4_2D_base<inst, outtype, intype,
3348349cc55cSDimitry Andric                         (ins i64imm:$t, i64imm:$s)>;
3349349cc55cSDimitry Andric}
3350349cc55cSDimitry Andric
3351349cc55cSDimitry Andricdefm TLD4_R_2D_F32_F32
3352349cc55cSDimitry Andric  : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3353349cc55cSDimitry Andricdefm TLD4_G_2D_F32_F32
3354349cc55cSDimitry Andric  : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3355349cc55cSDimitry Andricdefm TLD4_B_2D_F32_F32
3356349cc55cSDimitry Andric  : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3357349cc55cSDimitry Andricdefm TLD4_A_2D_F32_F32
3358349cc55cSDimitry Andric  : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3359349cc55cSDimitry Andric
3360349cc55cSDimitry Andricdefm TLD4_R_2D_S32_F32
3361349cc55cSDimitry Andric  : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3362349cc55cSDimitry Andricdefm TLD4_G_2D_S32_F32
3363349cc55cSDimitry Andric  : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3364349cc55cSDimitry Andricdefm TLD4_B_2D_S32_F32
3365349cc55cSDimitry Andric  : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3366349cc55cSDimitry Andricdefm TLD4_A_2D_S32_F32
3367349cc55cSDimitry Andric  : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3368349cc55cSDimitry Andric
3369349cc55cSDimitry Andricdefm TLD4_R_2D_U32_F32
3370349cc55cSDimitry Andric  : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3371349cc55cSDimitry Andricdefm TLD4_G_2D_U32_F32
3372349cc55cSDimitry Andric  : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3373349cc55cSDimitry Andricdefm TLD4_B_2D_U32_F32
3374349cc55cSDimitry Andric  : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3375349cc55cSDimitry Andricdefm TLD4_A_2D_U32_F32
3376349cc55cSDimitry Andric  : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3377349cc55cSDimitry Andric
33780b57cec5SDimitry Andric}
33790b57cec5SDimitry Andric
33800b57cec5SDimitry Andric
33810b57cec5SDimitry Andric// texmode_unified
3382e8d8bef9SDimitry Andriclet IsTex = true, IsTexModeUnified = true in {
33830b57cec5SDimitry Andric// Texture fetch instructions using handles
3384349cc55cSDimitry Andric
3385349cc55cSDimitry Andricclass TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
3386349cc55cSDimitry Andric                          NVPTXRegClass intype, dag tex>
3387349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3388349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3389349cc55cSDimitry Andric                 !con(tex, (ins intype:$x)),
3390349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
33910b57cec5SDimitry Andric                 []>;
3392349cc55cSDimitry Andric
3393349cc55cSDimitry Andricmulticlass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
3394349cc55cSDimitry Andric                          NVPTXRegClass intype> {
3395349cc55cSDimitry Andric  def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3396349cc55cSDimitry Andric  def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
3397349cc55cSDimitry Andric}
3398349cc55cSDimitry Andric
3399349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_F32_S32
3400349cc55cSDimitry Andric  : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
3401349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_F32_F32
3402349cc55cSDimitry Andric  : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3403349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_S32_S32
3404349cc55cSDimitry Andric  : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
3405349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_S32_F32
3406349cc55cSDimitry Andric  : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3407349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_U32_S32
3408349cc55cSDimitry Andric  : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
3409349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_U32_F32
3410349cc55cSDimitry Andric  : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3411349cc55cSDimitry Andric
3412349cc55cSDimitry Andricclass TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
3413349cc55cSDimitry Andric                                NVPTXRegClass intype, dag tex>
3414349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3415349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3416349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$lod)),
3417349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
34180b57cec5SDimitry Andric                 []>;
3419349cc55cSDimitry Andric
3420349cc55cSDimitry Andricmulticlass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
3421349cc55cSDimitry Andric                                NVPTXRegClass intype> {
3422349cc55cSDimitry Andric  def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3423349cc55cSDimitry Andric  def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3424349cc55cSDimitry Andric}
3425349cc55cSDimitry Andric
3426349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_F32_F32_LEVEL
3427349cc55cSDimitry Andric  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3428349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_S32_F32_LEVEL
3429349cc55cSDimitry Andric  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3430349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_U32_F32_LEVEL
3431349cc55cSDimitry Andric  : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3432349cc55cSDimitry Andric
3433349cc55cSDimitry Andricclass TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
3434349cc55cSDimitry Andric                               NVPTXRegClass intype, dag tex>
3435349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3436349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3437349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
3438349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
34390b57cec5SDimitry Andric                        " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
34400b57cec5SDimitry Andric                 []>;
34410b57cec5SDimitry Andric
3442349cc55cSDimitry Andricmulticlass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
3443349cc55cSDimitry Andric                               NVPTXRegClass intype> {
3444349cc55cSDimitry Andric  def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3445349cc55cSDimitry Andric  def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3446349cc55cSDimitry Andric}
3447349cc55cSDimitry Andric
3448349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_F32_F32_GRAD
3449349cc55cSDimitry Andric  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
3450349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_S32_F32_GRAD
3451349cc55cSDimitry Andric  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
3452349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_U32_F32_GRAD
3453349cc55cSDimitry Andric  : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
3454349cc55cSDimitry Andric
3455349cc55cSDimitry Andricclass TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
3456349cc55cSDimitry Andric                                NVPTXRegClass intype, dag tex>
3457349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3458349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3459349cc55cSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x)),
3460349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
34610b57cec5SDimitry Andric                 []>;
3462349cc55cSDimitry Andric
3463349cc55cSDimitry Andricmulticlass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
3464349cc55cSDimitry Andric                                NVPTXRegClass intype> {
3465349cc55cSDimitry Andric  def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3466349cc55cSDimitry Andric  def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3467349cc55cSDimitry Andric}
3468349cc55cSDimitry Andric
3469349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_F32_S32
3470349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
3471349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_F32_F32
3472349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
3473349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_S32_S32
3474349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
3475349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_S32_F32
3476349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
3477349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_U32_S32
3478349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
3479349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_U32_F32
3480349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
3481349cc55cSDimitry Andric
3482349cc55cSDimitry Andricclass TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3483349cc55cSDimitry Andric                                      NVPTXRegClass intype, dag tex>
3484349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3485349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3486349cc55cSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
3487349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
34880b57cec5SDimitry Andric                 []>;
3489349cc55cSDimitry Andric
3490349cc55cSDimitry Andricmulticlass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3491349cc55cSDimitry Andric                                      NVPTXRegClass intype> {
3492349cc55cSDimitry Andric  def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3493349cc55cSDimitry Andric                                           (ins Int64Regs:$t)>;
3494349cc55cSDimitry Andric  def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
3495349cc55cSDimitry Andric                                           (ins i64imm:$t)>;
3496349cc55cSDimitry Andric}
3497349cc55cSDimitry Andric
3498349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
3499349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
3500349cc55cSDimitry Andric                               Float32Regs, Float32Regs>;
3501349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
3502349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
3503349cc55cSDimitry Andric                               Int32Regs, Float32Regs>;
3504349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
3505349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
3506349cc55cSDimitry Andric                               Int32Regs, Float32Regs>;
3507349cc55cSDimitry Andric
3508349cc55cSDimitry Andricclass TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3509349cc55cSDimitry Andric                                     NVPTXRegClass intype, dag tex>
3510349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3511349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3512349cc55cSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x,
3513349cc55cSDimitry Andric                                intype:$gradx, intype:$grady)),
3514349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
35150b57cec5SDimitry Andric                        "  [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
35160b57cec5SDimitry Andric                 []>;
35170b57cec5SDimitry Andric
3518349cc55cSDimitry Andricmulticlass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3519349cc55cSDimitry Andric                                     NVPTXRegClass intype> {
3520349cc55cSDimitry Andric  def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3521349cc55cSDimitry Andric                                          (ins Int64Regs:$t)>;
3522349cc55cSDimitry Andric  def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
3523349cc55cSDimitry Andric                                          (ins i64imm:$t)>;
3524349cc55cSDimitry Andric}
3525349cc55cSDimitry Andric
3526349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
3527349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
3528349cc55cSDimitry Andric                              Float32Regs, Float32Regs>;
3529349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
3530349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
3531349cc55cSDimitry Andric                              Int32Regs, Float32Regs>;
3532349cc55cSDimitry Andricdefm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
3533349cc55cSDimitry Andric  : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
3534349cc55cSDimitry Andric                              Int32Regs, Float32Regs>;
3535349cc55cSDimitry Andric
3536349cc55cSDimitry Andricclass TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3537349cc55cSDimitry Andric                          NVPTXRegClass intype, dag tex>
3538349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3539349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3540349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y)),
3541349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
35420b57cec5SDimitry Andric                 []>;
35430b57cec5SDimitry Andric
3544349cc55cSDimitry Andricmulticlass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3545349cc55cSDimitry Andric                          NVPTXRegClass intype> {
3546349cc55cSDimitry Andric  def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3547349cc55cSDimitry Andric  def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3548349cc55cSDimitry Andric}
3549349cc55cSDimitry Andric
3550349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_F32_S32
3551349cc55cSDimitry Andric  : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
3552349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_F32_F32
3553349cc55cSDimitry Andric  : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3554349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_S32_S32
3555349cc55cSDimitry Andric  : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
3556349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_S32_F32
3557349cc55cSDimitry Andric  : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3558349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_U32_S32
3559349cc55cSDimitry Andric  : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
3560349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_U32_F32
3561349cc55cSDimitry Andric  : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3562349cc55cSDimitry Andric
3563349cc55cSDimitry Andricclass TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
3564349cc55cSDimitry Andric                                NVPTXRegClass intype, dag tex>
3565349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3566349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3567349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
3568349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
35690b57cec5SDimitry Andric                 []>;
35700b57cec5SDimitry Andric
3571349cc55cSDimitry Andricmulticlass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
3572349cc55cSDimitry Andric                                NVPTXRegClass intype> {
3573349cc55cSDimitry Andric  def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3574349cc55cSDimitry Andric  def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3575349cc55cSDimitry Andric}
3576349cc55cSDimitry Andric
3577349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_F32_F32_LEVEL
3578349cc55cSDimitry Andric  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3579349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_S32_F32_LEVEL
3580349cc55cSDimitry Andric  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3581349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_U32_F32_LEVEL
3582349cc55cSDimitry Andric  : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3583349cc55cSDimitry Andric
3584349cc55cSDimitry Andricclass TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
3585349cc55cSDimitry Andric                               NVPTXRegClass intype, dag tex>
3586349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3587349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3588349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y,
3589349cc55cSDimitry Andric                                intype:$gradx0, intype:$gradx1,
3590349cc55cSDimitry Andric                                intype:$grady0, intype:$grady1)),
3591349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
3592349cc55cSDimitry Andric                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
35930b57cec5SDimitry Andric                 []>;
3594349cc55cSDimitry Andricmulticlass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
3595349cc55cSDimitry Andric                               NVPTXRegClass intype> {
3596349cc55cSDimitry Andric  def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3597349cc55cSDimitry Andric  def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3598349cc55cSDimitry Andric}
3599349cc55cSDimitry Andric
3600349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_F32_F32_GRAD
3601349cc55cSDimitry Andric  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3602349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_S32_F32_GRAD
3603349cc55cSDimitry Andric  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3604349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_U32_F32_GRAD
3605349cc55cSDimitry Andric  : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3606349cc55cSDimitry Andric
3607349cc55cSDimitry Andricclass TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
3608349cc55cSDimitry Andric                                NVPTXRegClass intype, dag tex>
3609349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3610349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3611349cc55cSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
3612349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
36130b57cec5SDimitry Andric                 []>;
3614349cc55cSDimitry Andricmulticlass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
3615349cc55cSDimitry Andric                                NVPTXRegClass intype> {
3616349cc55cSDimitry Andric  def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3617349cc55cSDimitry Andric  def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
3618349cc55cSDimitry Andric}
3619349cc55cSDimitry Andric
3620349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_F32_S32
3621349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
3622349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_F32_F32
3623349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
3624349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_S32_S32
3625349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
3626349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_S32_F32
3627349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
3628349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_U32_S32
3629349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
3630349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_U32_F32
3631349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
3632349cc55cSDimitry Andric
3633349cc55cSDimitry Andricclass TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3634349cc55cSDimitry Andric                                      NVPTXRegClass intype, dag tex>
3635349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3636349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3637349cc55cSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3638349cc55cSDimitry Andric                                intype:$lod)),
3639349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
3640349cc55cSDimitry Andric                        "  [$t, \\{$l, $x, $y, $y\\}], $lod;",
3641349cc55cSDimitry Andric                 []>;
3642349cc55cSDimitry Andricmulticlass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3643349cc55cSDimitry Andric                                      NVPTXRegClass intype> {
3644349cc55cSDimitry Andric  def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3645349cc55cSDimitry Andric                                           (ins Int64Regs:$t)>;
3646349cc55cSDimitry Andric  def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
3647349cc55cSDimitry Andric                                           (ins i64imm:$t)>;
3648349cc55cSDimitry Andric}
3649349cc55cSDimitry Andric
3650349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
3651349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
3652349cc55cSDimitry Andric                               Float32Regs, Float32Regs>;
3653349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
3654349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
3655349cc55cSDimitry Andric                               Int32Regs, Float32Regs>;
3656349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
3657349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
3658349cc55cSDimitry Andric                               Int32Regs, Float32Regs>;
3659349cc55cSDimitry Andric
3660349cc55cSDimitry Andricclass TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
3661349cc55cSDimitry Andric                                     NVPTXRegClass intype, dag tex>
3662349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3663349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3664349cc55cSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
3665349cc55cSDimitry Andric                                intype:$gradx0, intype:$gradx1,
3666349cc55cSDimitry Andric                                intype:$grady0, intype:$grady1)),
3667349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
3668349cc55cSDimitry Andric                        " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
3669349cc55cSDimitry Andric                 []>;
3670349cc55cSDimitry Andricmulticlass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
3671349cc55cSDimitry Andric                                     NVPTXRegClass intype> {
3672349cc55cSDimitry Andric  def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3673349cc55cSDimitry Andric                                          (ins Int64Regs:$t)>;
3674349cc55cSDimitry Andric  def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
3675349cc55cSDimitry Andric                                          (ins i64imm:$t)>;
3676349cc55cSDimitry Andric}
3677349cc55cSDimitry Andric
3678349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
3679349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
3680349cc55cSDimitry Andric                              Float32Regs, Float32Regs>;
3681349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
3682349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
3683349cc55cSDimitry Andric                              Int32Regs, Float32Regs>;
3684349cc55cSDimitry Andricdefm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
3685349cc55cSDimitry Andric  : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
3686349cc55cSDimitry Andric                              Int32Regs, Float32Regs>;
3687349cc55cSDimitry Andric
3688349cc55cSDimitry Andricclass TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
3689349cc55cSDimitry Andric                          NVPTXRegClass intype, dag tex>
3690349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3691349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3692349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3693349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
3694349cc55cSDimitry Andric                 []>;
3695349cc55cSDimitry Andricmulticlass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
3696349cc55cSDimitry Andric                          NVPTXRegClass intype> {
3697349cc55cSDimitry Andric  def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3698349cc55cSDimitry Andric  def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
3699349cc55cSDimitry Andric}
3700349cc55cSDimitry Andric
3701349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_F32_S32
3702349cc55cSDimitry Andric  : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
3703349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_F32_F32
3704349cc55cSDimitry Andric  : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3705349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_S32_S32
3706349cc55cSDimitry Andric  : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
3707349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_S32_F32
3708349cc55cSDimitry Andric  : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3709349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_U32_S32
3710349cc55cSDimitry Andric  : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
3711349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_U32_F32
3712349cc55cSDimitry Andric  : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3713349cc55cSDimitry Andric
3714349cc55cSDimitry Andricclass TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
3715349cc55cSDimitry Andric                                NVPTXRegClass intype, dag tex>
3716349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3717349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3718349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3719349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
37200b57cec5SDimitry Andric                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
37210b57cec5SDimitry Andric                 []>;
3722349cc55cSDimitry Andricmulticlass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
3723349cc55cSDimitry Andric                                NVPTXRegClass intype> {
3724349cc55cSDimitry Andric  def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3725349cc55cSDimitry Andric  def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
3726349cc55cSDimitry Andric}
3727349cc55cSDimitry Andric
3728349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_F32_F32_LEVEL
3729349cc55cSDimitry Andric  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3730349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_S32_F32_LEVEL
3731349cc55cSDimitry Andric  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3732349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_U32_F32_LEVEL
3733349cc55cSDimitry Andric  : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
3734349cc55cSDimitry Andric
3735349cc55cSDimitry Andricclass TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
3736349cc55cSDimitry Andric                               NVPTXRegClass intype, dag tex>
3737349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3738349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3739349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
3740349cc55cSDimitry Andric                                intype:$gradx0, intype:$gradx1,
3741349cc55cSDimitry Andric                                intype:$gradx2, intype:$grady0,
3742349cc55cSDimitry Andric                                intype:$grady1, intype:$grady2)),
3743349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
37440b57cec5SDimitry Andric                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
37450b57cec5SDimitry Andric                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
37460b57cec5SDimitry Andric                 []>;
3747349cc55cSDimitry Andricmulticlass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
3748349cc55cSDimitry Andric                               NVPTXRegClass intype> {
3749349cc55cSDimitry Andric  def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3750349cc55cSDimitry Andric  def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
3751349cc55cSDimitry Andric}
37520b57cec5SDimitry Andric
3753349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_F32_F32_GRAD
3754349cc55cSDimitry Andric  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
3755349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_S32_F32_GRAD
3756349cc55cSDimitry Andric  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
3757349cc55cSDimitry Andricdefm TEX_UNIFIED_3D_U32_F32_GRAD
3758349cc55cSDimitry Andric  : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
37590b57cec5SDimitry Andric
3760349cc55cSDimitry Andricclass TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
3761349cc55cSDimitry Andric                            NVPTXRegClass intype, dag tex>
3762349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3763349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3764349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y, intype:$z)),
3765349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
37660b57cec5SDimitry Andric                 []>;
3767349cc55cSDimitry Andricmulticlass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
3768349cc55cSDimitry Andric                            NVPTXRegClass intype> {
3769349cc55cSDimitry Andric  def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3770349cc55cSDimitry Andric  def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
3771349cc55cSDimitry Andric}
3772349cc55cSDimitry Andric
3773349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_F32_F32
3774349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
3775349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_S32_F32
3776349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
3777349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_U32_F32
3778349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
3779349cc55cSDimitry Andric
3780349cc55cSDimitry Andricclass TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
3781349cc55cSDimitry Andric                                  NVPTXRegClass intype, dag tex>
3782349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3783349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3784349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
3785349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
3786349cc55cSDimitry Andric                        " [$t, \\{$x, $y, $z, $z\\}], $lod;",
3787349cc55cSDimitry Andric                 []>;
3788349cc55cSDimitry Andricmulticlass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
3789349cc55cSDimitry Andric                                  NVPTXRegClass intype> {
3790349cc55cSDimitry Andric  def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3791349cc55cSDimitry Andric                                       (ins Int64Regs:$t)>;
3792349cc55cSDimitry Andric  def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
3793349cc55cSDimitry Andric                                       (ins i64imm:$t)>;
3794349cc55cSDimitry Andric}
3795349cc55cSDimitry Andric
3796349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_F32_F32_LEVEL
3797349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
3798349cc55cSDimitry Andric                           Float32Regs, Float32Regs>;
3799349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_S32_F32_LEVEL
3800349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
3801349cc55cSDimitry Andric                           Int32Regs, Float32Regs>;
3802349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_U32_F32_LEVEL
3803349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
3804349cc55cSDimitry Andric                           Int32Regs, Float32Regs>;
3805349cc55cSDimitry Andric
3806349cc55cSDimitry Andricclass TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
3807349cc55cSDimitry Andric                                  NVPTXRegClass intype, dag tex>
3808349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3809349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3810349cc55cSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
3811349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
3812349cc55cSDimitry Andric                 []>;
3813349cc55cSDimitry Andricmulticlass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
3814349cc55cSDimitry Andric                                  NVPTXRegClass intype> {
3815349cc55cSDimitry Andric  def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3816349cc55cSDimitry Andric                                       (ins Int64Regs:$t)>;
3817349cc55cSDimitry Andric  def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
3818349cc55cSDimitry Andric                                       (ins i64imm:$t)>;
3819349cc55cSDimitry Andric}
3820349cc55cSDimitry Andric
3821349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_F32_F32
3822349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
3823349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_S32_F32
3824349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
3825349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_U32_F32
3826349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
3827349cc55cSDimitry Andric
3828349cc55cSDimitry Andricclass TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
3829349cc55cSDimitry Andric                                        NVPTXRegClass intype, dag tex>
3830349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
3831349cc55cSDimitry Andric                      outtype:$b, outtype:$a),
3832349cc55cSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
3833349cc55cSDimitry Andric                                intype:$lod)),
3834349cc55cSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\},"
38350b57cec5SDimitry Andric                        " [$t, \\{$l, $x, $y, $z\\}], $lod;",
38360b57cec5SDimitry Andric                 []>;
3837349cc55cSDimitry Andricmulticlass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
3838349cc55cSDimitry Andric                                        NVPTXRegClass intype> {
3839349cc55cSDimitry Andric  def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3840349cc55cSDimitry Andric                                             (ins Int64Regs:$t)>;
3841349cc55cSDimitry Andric  def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
3842349cc55cSDimitry Andric                                             (ins i64imm:$t)>;
3843349cc55cSDimitry Andric}
38440b57cec5SDimitry Andric
3845349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
3846349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
3847349cc55cSDimitry Andric                                 Float32Regs, Float32Regs>;
3848349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
3849349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
3850349cc55cSDimitry Andric                                 Int32Regs, Float32Regs>;
3851349cc55cSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
3852349cc55cSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
3853349cc55cSDimitry Andric                                 Int32Regs, Float32Regs>;
3854349cc55cSDimitry Andric
38557a6dacacSDimitry Andricclass TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype,
38567a6dacacSDimitry Andric                                 NVPTXRegClass intype, dag tex>
38577a6dacacSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
38587a6dacacSDimitry Andric                      outtype:$b, outtype:$a),
38597a6dacacSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y, intype:$z,
38607a6dacacSDimitry Andric                                intype:$gradx0, intype:$gradx1,
38617a6dacacSDimitry Andric                                intype:$gradx2, intype:$grady0,
38627a6dacacSDimitry Andric                                intype:$grady1, intype:$grady2)),
38637a6dacacSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
38647a6dacacSDimitry Andric                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
38657a6dacacSDimitry Andric                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
38667a6dacacSDimitry Andric                 []>;
38677a6dacacSDimitry Andric
38687a6dacacSDimitry Andricmulticlass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype,
38697a6dacacSDimitry Andric                                 NVPTXRegClass intype> {
38707a6dacacSDimitry Andric  def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
38717a6dacacSDimitry Andric  def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
38727a6dacacSDimitry Andric}
38737a6dacacSDimitry Andric
38747a6dacacSDimitry Andricdefm TEX_UNIFIED_CUBE_F32_F32_GRAD
38757a6dacacSDimitry Andric  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>;
38767a6dacacSDimitry Andricdefm TEX_UNIFIED_CUBE_S32_F32_GRAD
38777a6dacacSDimitry Andric  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>;
38787a6dacacSDimitry Andricdefm TEX_UNIFIED_CUBE_U32_F32_GRAD
38797a6dacacSDimitry Andric  : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>;
38807a6dacacSDimitry Andric
38817a6dacacSDimitry Andricclass TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
38827a6dacacSDimitry Andric                                       NVPTXRegClass intype, dag tex>
38837a6dacacSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g,
38847a6dacacSDimitry Andric                      outtype:$b, outtype:$a),
38857a6dacacSDimitry Andric                 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
38867a6dacacSDimitry Andric                                intype:$gradx0, intype:$gradx1,
38877a6dacacSDimitry Andric                                intype:$gradx2, intype:$grady0,
38887a6dacacSDimitry Andric                                intype:$grady1, intype:$grady2)),
38897a6dacacSDimitry Andric                 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}],"
38907a6dacacSDimitry Andric                        " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
38917a6dacacSDimitry Andric                        " \\{$grady0, $grady1, $grady2, $grady2\\};",
38927a6dacacSDimitry Andric                 []>;
38937a6dacacSDimitry Andricmulticlass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
38947a6dacacSDimitry Andric                                       NVPTXRegClass intype> {
38957a6dacacSDimitry Andric  def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
38967a6dacacSDimitry Andric                                            (ins Int64Regs:$t)>;
38977a6dacacSDimitry Andric  def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype,
38987a6dacacSDimitry Andric                                            (ins i64imm:$t)>;
38997a6dacacSDimitry Andric}
39007a6dacacSDimitry Andric
39017a6dacacSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD
39027a6dacacSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32",
39037a6dacacSDimitry Andric                                Float32Regs, Float32Regs>;
39047a6dacacSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD
39057a6dacacSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32",
39067a6dacacSDimitry Andric                                Int32Regs, Float32Regs>;
39077a6dacacSDimitry Andricdefm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD
39087a6dacacSDimitry Andric  : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32",
39097a6dacacSDimitry Andric                                Int32Regs, Float32Regs>;
39107a6dacacSDimitry Andric
3911349cc55cSDimitry Andricclass TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
3912349cc55cSDimitry Andric                           NVPTXRegClass intype, dag tex>
3913349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$v0, outtype:$v1,
3914349cc55cSDimitry Andric                      outtype:$v2, outtype:$v3),
3915349cc55cSDimitry Andric                 !con(tex, (ins intype:$x, intype:$y)),
3916349cc55cSDimitry Andric                 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
39170b57cec5SDimitry Andric                 []>;
3918349cc55cSDimitry Andricmulticlass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
3919349cc55cSDimitry Andric                           NVPTXRegClass intype> {
3920349cc55cSDimitry Andric  def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
3921349cc55cSDimitry Andric  def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
3922349cc55cSDimitry Andric}
3923349cc55cSDimitry Andric
3924349cc55cSDimitry Andricdefm TLD4_UNIFIED_R_2D_F32_F32
3925349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3926349cc55cSDimitry Andricdefm TLD4_UNIFIED_G_2D_F32_F32
3927349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3928349cc55cSDimitry Andricdefm TLD4_UNIFIED_B_2D_F32_F32
3929349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3930349cc55cSDimitry Andricdefm TLD4_UNIFIED_A_2D_F32_F32
3931349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
3932349cc55cSDimitry Andric
3933349cc55cSDimitry Andricdefm TLD4_UNIFIED_R_2D_S32_F32
3934349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3935349cc55cSDimitry Andricdefm TLD4_UNIFIED_G_2D_S32_F32
3936349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3937349cc55cSDimitry Andricdefm TLD4_UNIFIED_B_2D_S32_F32
3938349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3939349cc55cSDimitry Andricdefm TLD4_UNIFIED_A_2D_S32_F32
3940349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
3941349cc55cSDimitry Andric
3942349cc55cSDimitry Andricdefm TLD4_UNIFIED_R_2D_U32_F32
3943349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3944349cc55cSDimitry Andricdefm TLD4_UNIFIED_G_2D_U32_F32
3945349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3946349cc55cSDimitry Andricdefm TLD4_UNIFIED_B_2D_U32_F32
3947349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3948349cc55cSDimitry Andricdefm TLD4_UNIFIED_A_2D_U32_F32
3949349cc55cSDimitry Andric  : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
3950349cc55cSDimitry Andric
39510b57cec5SDimitry Andric}
39520b57cec5SDimitry Andric
39530b57cec5SDimitry Andric
39540b57cec5SDimitry Andric
39550b57cec5SDimitry Andric//=== Surface load instructions
3956349cc55cSDimitry Andric
3957e8d8bef9SDimitry Andriclet IsSuld = true in {
39580b57cec5SDimitry Andric
3959349cc55cSDimitry Andricclass SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
3960349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r),
3961349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x)),
3962349cc55cSDimitry Andric                inst # " \\{$r\\}, [$s, \\{$x\\}];",
39630b57cec5SDimitry Andric                []>;
3964349cc55cSDimitry Andricmulticlass SULD_1D<string inst, NVPTXRegClass outtype> {
3965349cc55cSDimitry Andric  def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
3966349cc55cSDimitry Andric  def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
3967349cc55cSDimitry Andric}
39680b57cec5SDimitry Andric
3969349cc55cSDimitry Andricdefm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
3970349cc55cSDimitry Andricdefm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
3971349cc55cSDimitry Andricdefm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
3972349cc55cSDimitry Andricdefm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
39730b57cec5SDimitry Andric
3974349cc55cSDimitry Andricdefm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
3975349cc55cSDimitry Andricdefm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
3976349cc55cSDimitry Andricdefm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
3977349cc55cSDimitry Andricdefm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
39780b57cec5SDimitry Andric
3979349cc55cSDimitry Andricdefm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
3980349cc55cSDimitry Andricdefm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
3981349cc55cSDimitry Andricdefm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
3982349cc55cSDimitry Andricdefm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
3983349cc55cSDimitry Andric
3984349cc55cSDimitry Andricclass SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
3985349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r),
3986349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
3987349cc55cSDimitry Andric                inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
39880b57cec5SDimitry Andric                []>;
3989349cc55cSDimitry Andricmulticlass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
3990349cc55cSDimitry Andric  def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
3991349cc55cSDimitry Andric  def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
3992349cc55cSDimitry Andric}
3993349cc55cSDimitry Andric
3994349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I8_CLAMP
3995349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
3996349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I16_CLAMP
3997349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
3998349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I32_CLAMP
3999349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
4000349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I64_CLAMP
4001349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
4002349cc55cSDimitry Andric
4003349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I8_TRAP
4004349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
4005349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I16_TRAP
4006349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
4007349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I32_TRAP
4008349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
4009349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I64_TRAP
4010349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
4011349cc55cSDimitry Andric
4012349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I8_ZERO
4013349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
4014349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I16_ZERO
4015349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
4016349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I32_ZERO
4017349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
4018349cc55cSDimitry Andricdefm SULD_1D_ARRAY_I64_ZERO
4019349cc55cSDimitry Andric  : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
4020349cc55cSDimitry Andric
4021349cc55cSDimitry Andricclass SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
4022349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r),
4023349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4024349cc55cSDimitry Andric                inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
40250b57cec5SDimitry Andric                []>;
4026349cc55cSDimitry Andricmulticlass SULD_2D<string inst, NVPTXRegClass outtype> {
4027349cc55cSDimitry Andric  def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
4028349cc55cSDimitry Andric  def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
4029349cc55cSDimitry Andric}
4030349cc55cSDimitry Andric
4031349cc55cSDimitry Andricdefm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
4032349cc55cSDimitry Andricdefm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
4033349cc55cSDimitry Andricdefm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
4034349cc55cSDimitry Andricdefm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
4035349cc55cSDimitry Andric
4036349cc55cSDimitry Andricdefm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
4037349cc55cSDimitry Andricdefm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
4038349cc55cSDimitry Andricdefm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
4039349cc55cSDimitry Andricdefm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
4040349cc55cSDimitry Andric
4041349cc55cSDimitry Andricdefm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
4042349cc55cSDimitry Andricdefm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
4043349cc55cSDimitry Andricdefm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
4044349cc55cSDimitry Andricdefm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
4045349cc55cSDimitry Andric
4046349cc55cSDimitry Andricclass SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
4047349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r),
4048349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4049349cc55cSDimitry Andric                inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
40500b57cec5SDimitry Andric                []>;
4051349cc55cSDimitry Andricmulticlass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
4052349cc55cSDimitry Andric  def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
4053349cc55cSDimitry Andric  def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
4054349cc55cSDimitry Andric}
4055349cc55cSDimitry Andric
4056349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
4057349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
4058349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
4059349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
4060349cc55cSDimitry Andric
4061349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
4062349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
4063349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
4064349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
4065349cc55cSDimitry Andric
4066349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
4067349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
4068349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
4069349cc55cSDimitry Andricdefm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
4070349cc55cSDimitry Andric
4071349cc55cSDimitry Andricclass SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
4072349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r),
4073349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4074349cc55cSDimitry Andric                inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
40750b57cec5SDimitry Andric                []>;
4076349cc55cSDimitry Andricmulticlass SULD_3D<string inst, NVPTXRegClass outtype> {
4077349cc55cSDimitry Andric  def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
4078349cc55cSDimitry Andric  def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
4079349cc55cSDimitry Andric}
4080349cc55cSDimitry Andric
4081349cc55cSDimitry Andricdefm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
4082349cc55cSDimitry Andricdefm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
4083349cc55cSDimitry Andricdefm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
4084349cc55cSDimitry Andricdefm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
4085349cc55cSDimitry Andric
4086349cc55cSDimitry Andricdefm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
4087349cc55cSDimitry Andricdefm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
4088349cc55cSDimitry Andricdefm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
4089349cc55cSDimitry Andricdefm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
4090349cc55cSDimitry Andric
4091349cc55cSDimitry Andricdefm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
4092349cc55cSDimitry Andricdefm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
4093349cc55cSDimitry Andricdefm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
4094349cc55cSDimitry Andricdefm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
40950b57cec5SDimitry Andric}
40960b57cec5SDimitry Andric
40970b57cec5SDimitry Andriclet IsSuld = 2 in {
40980b57cec5SDimitry Andric
4099349cc55cSDimitry Andricclass SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4100349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g),
4101349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x)),
4102349cc55cSDimitry Andric                inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
41030b57cec5SDimitry Andric                []>;
4104349cc55cSDimitry Andricmulticlass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
4105349cc55cSDimitry Andric  def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4106349cc55cSDimitry Andric  def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
4107349cc55cSDimitry Andric}
41080b57cec5SDimitry Andric
4109349cc55cSDimitry Andricdefm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
4110349cc55cSDimitry Andricdefm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
4111349cc55cSDimitry Andricdefm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
4112349cc55cSDimitry Andricdefm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
41130b57cec5SDimitry Andric
4114349cc55cSDimitry Andricdefm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
4115349cc55cSDimitry Andricdefm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
4116349cc55cSDimitry Andricdefm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
4117349cc55cSDimitry Andricdefm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
41180b57cec5SDimitry Andric
4119349cc55cSDimitry Andricdefm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
4120349cc55cSDimitry Andricdefm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
4121349cc55cSDimitry Andricdefm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
4122349cc55cSDimitry Andricdefm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
4123349cc55cSDimitry Andric
4124349cc55cSDimitry Andricclass SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4125349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g),
4126349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4127349cc55cSDimitry Andric                inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
41280b57cec5SDimitry Andric                []>;
4129349cc55cSDimitry Andricmulticlass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4130349cc55cSDimitry Andric  def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4131349cc55cSDimitry Andric  def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4132349cc55cSDimitry Andric}
4133349cc55cSDimitry Andric
4134349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I8_CLAMP
4135349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
4136349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I16_CLAMP
4137349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
4138349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I32_CLAMP
4139349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
4140349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I64_CLAMP
4141349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
4142349cc55cSDimitry Andric
4143349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I8_TRAP
4144349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
4145349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I16_TRAP
4146349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
4147349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I32_TRAP
4148349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
4149349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I64_TRAP
4150349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
4151349cc55cSDimitry Andric
4152349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I8_ZERO
4153349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
4154349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I16_ZERO
4155349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
4156349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I32_ZERO
4157349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
4158349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V2I64_ZERO
4159349cc55cSDimitry Andric  : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
4160349cc55cSDimitry Andric
4161349cc55cSDimitry Andricclass SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4162349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g),
4163349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4164349cc55cSDimitry Andric                inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
41650b57cec5SDimitry Andric                []>;
4166349cc55cSDimitry Andricmulticlass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
4167349cc55cSDimitry Andric  def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4168349cc55cSDimitry Andric  def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
4169349cc55cSDimitry Andric}
4170349cc55cSDimitry Andric
4171349cc55cSDimitry Andricdefm SULD_2D_V2I8_CLAMP
4172349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
4173349cc55cSDimitry Andricdefm SULD_2D_V2I16_CLAMP
4174349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
4175349cc55cSDimitry Andricdefm SULD_2D_V2I32_CLAMP
4176349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
4177349cc55cSDimitry Andricdefm SULD_2D_V2I64_CLAMP
4178349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
4179349cc55cSDimitry Andric
4180349cc55cSDimitry Andricdefm SULD_2D_V2I8_TRAP
4181349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
4182349cc55cSDimitry Andricdefm SULD_2D_V2I16_TRAP
4183349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
4184349cc55cSDimitry Andricdefm SULD_2D_V2I32_TRAP
4185349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
4186349cc55cSDimitry Andricdefm SULD_2D_V2I64_TRAP
4187349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
4188349cc55cSDimitry Andric
4189349cc55cSDimitry Andricdefm SULD_2D_V2I8_ZERO
4190349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
4191349cc55cSDimitry Andricdefm SULD_2D_V2I16_ZERO
4192349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
4193349cc55cSDimitry Andricdefm SULD_2D_V2I32_ZERO
4194349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
4195349cc55cSDimitry Andricdefm SULD_2D_V2I64_ZERO
4196349cc55cSDimitry Andric  : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
4197349cc55cSDimitry Andric
4198349cc55cSDimitry Andricclass SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4199349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g),
4200349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4201349cc55cSDimitry Andric                inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
42020b57cec5SDimitry Andric                []>;
4203349cc55cSDimitry Andricmulticlass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
4204349cc55cSDimitry Andric  def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4205349cc55cSDimitry Andric  def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
4206349cc55cSDimitry Andric}
4207349cc55cSDimitry Andric
4208349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I8_CLAMP
4209349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
4210349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I16_CLAMP
4211349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
4212349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I32_CLAMP
4213349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
4214349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I64_CLAMP
4215349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
4216349cc55cSDimitry Andric
4217349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I8_TRAP
4218349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
4219349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I16_TRAP
4220349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
4221349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I32_TRAP
4222349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
4223349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I64_TRAP
4224349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
4225349cc55cSDimitry Andric
4226349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I8_ZERO
4227349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
4228349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I16_ZERO
4229349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
4230349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I32_ZERO
4231349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
4232349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V2I64_ZERO
4233349cc55cSDimitry Andric  : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
4234349cc55cSDimitry Andric
4235349cc55cSDimitry Andricclass SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
4236349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g),
4237349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4238349cc55cSDimitry Andric                inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
42390b57cec5SDimitry Andric                []>;
4240349cc55cSDimitry Andricmulticlass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
4241349cc55cSDimitry Andric  def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
4242349cc55cSDimitry Andric  def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
4243349cc55cSDimitry Andric}
4244349cc55cSDimitry Andric
4245349cc55cSDimitry Andricdefm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
4246349cc55cSDimitry Andricdefm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
4247349cc55cSDimitry Andricdefm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
4248349cc55cSDimitry Andricdefm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
4249349cc55cSDimitry Andric
4250349cc55cSDimitry Andricdefm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
4251349cc55cSDimitry Andricdefm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
4252349cc55cSDimitry Andricdefm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
4253349cc55cSDimitry Andricdefm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
4254349cc55cSDimitry Andric
4255349cc55cSDimitry Andricdefm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
4256349cc55cSDimitry Andricdefm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
4257349cc55cSDimitry Andricdefm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
4258349cc55cSDimitry Andricdefm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
4259349cc55cSDimitry Andric
42600b57cec5SDimitry Andric}
42610b57cec5SDimitry Andric
42620b57cec5SDimitry Andriclet IsSuld = 3 in {
42630b57cec5SDimitry Andric
4264349cc55cSDimitry Andricclass SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4265349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4266349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x)),
4267349cc55cSDimitry Andric                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
42680b57cec5SDimitry Andric                []>;
4269349cc55cSDimitry Andricmulticlass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
4270349cc55cSDimitry Andric  def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4271349cc55cSDimitry Andric  def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
42720b57cec5SDimitry Andric}
42730b57cec5SDimitry Andric
4274349cc55cSDimitry Andricdefm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
4275349cc55cSDimitry Andricdefm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
4276349cc55cSDimitry Andricdefm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
42770b57cec5SDimitry Andric
4278349cc55cSDimitry Andricdefm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
4279349cc55cSDimitry Andricdefm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
4280349cc55cSDimitry Andricdefm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
42810b57cec5SDimitry Andric
4282349cc55cSDimitry Andricdefm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
4283349cc55cSDimitry Andricdefm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
4284349cc55cSDimitry Andricdefm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
42850b57cec5SDimitry Andric
4286349cc55cSDimitry Andricclass SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4287349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4288349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
4289349cc55cSDimitry Andric                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
42900b57cec5SDimitry Andric                []>;
4291349cc55cSDimitry Andricmulticlass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4292349cc55cSDimitry Andric  def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4293349cc55cSDimitry Andric  def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
42940b57cec5SDimitry Andric}
42950b57cec5SDimitry Andric
4296349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I8_CLAMP
4297349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
4298349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I16_CLAMP
4299349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
4300349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I32_CLAMP
4301349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
43020b57cec5SDimitry Andric
4303349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I8_TRAP
4304349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
4305349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I16_TRAP
4306349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
4307349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I32_TRAP
4308349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
43090b57cec5SDimitry Andric
4310349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I8_ZERO
4311349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
4312349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I16_ZERO
4313349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
4314349cc55cSDimitry Andricdefm SULD_1D_ARRAY_V4I32_ZERO
4315349cc55cSDimitry Andric  : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
43160b57cec5SDimitry Andric
4317349cc55cSDimitry Andricclass SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4318349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4319349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
4320349cc55cSDimitry Andric                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
43210b57cec5SDimitry Andric                []>;
4322349cc55cSDimitry Andricmulticlass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
4323349cc55cSDimitry Andric  def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4324349cc55cSDimitry Andric  def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
43250b57cec5SDimitry Andric}
43260b57cec5SDimitry Andric
4327349cc55cSDimitry Andricdefm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
4328349cc55cSDimitry Andricdefm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
4329349cc55cSDimitry Andricdefm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
43300b57cec5SDimitry Andric
4331349cc55cSDimitry Andricdefm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
4332349cc55cSDimitry Andricdefm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
4333349cc55cSDimitry Andricdefm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
43340b57cec5SDimitry Andric
4335349cc55cSDimitry Andricdefm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
4336349cc55cSDimitry Andricdefm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
4337349cc55cSDimitry Andricdefm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
43380b57cec5SDimitry Andric
4339349cc55cSDimitry Andricclass SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4340349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4341349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
4342349cc55cSDimitry Andric                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
43430b57cec5SDimitry Andric                []>;
4344349cc55cSDimitry Andricmulticlass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
4345349cc55cSDimitry Andric  def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4346349cc55cSDimitry Andric  def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
43470b57cec5SDimitry Andric}
43480b57cec5SDimitry Andric
4349349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I8_CLAMP
4350349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
4351349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I16_CLAMP
4352349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
4353349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I32_CLAMP
4354349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
43550b57cec5SDimitry Andric
4356349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I8_TRAP
4357349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
4358349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I16_TRAP
4359349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
4360349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I32_TRAP
4361349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
43620b57cec5SDimitry Andric
4363349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I8_ZERO
4364349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
4365349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I16_ZERO
4366349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
4367349cc55cSDimitry Andricdefm SULD_2D_ARRAY_V4I32_ZERO
4368349cc55cSDimitry Andric  : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
43690b57cec5SDimitry Andric
4370349cc55cSDimitry Andricclass SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
4371349cc55cSDimitry Andric    : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
4372349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
4373349cc55cSDimitry Andric                inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
43740b57cec5SDimitry Andric                []>;
4375349cc55cSDimitry Andricmulticlass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
4376349cc55cSDimitry Andric  def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
4377349cc55cSDimitry Andric  def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
43780b57cec5SDimitry Andric}
43790b57cec5SDimitry Andric
4380349cc55cSDimitry Andricdefm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
4381349cc55cSDimitry Andricdefm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
4382349cc55cSDimitry Andricdefm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
43830b57cec5SDimitry Andric
4384349cc55cSDimitry Andricdefm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
4385349cc55cSDimitry Andricdefm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
4386349cc55cSDimitry Andricdefm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
43870b57cec5SDimitry Andric
4388349cc55cSDimitry Andricdefm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
4389349cc55cSDimitry Andricdefm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
4390349cc55cSDimitry Andricdefm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
43910b57cec5SDimitry Andric
43920b57cec5SDimitry Andric}
43930b57cec5SDimitry Andric
43940b57cec5SDimitry Andric//-----------------------------------
43950b57cec5SDimitry Andric// Texture Query Intrinsics
43960b57cec5SDimitry Andric//-----------------------------------
43970b57cec5SDimitry Andric
4398e8d8bef9SDimitry Andriclet IsSurfTexQuery = true in {
4399349cc55cSDimitry Andricdef TXQ_CHANNEL_ORDER_R
44000b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44010b57cec5SDimitry Andric              "txq.channel_order.b32 \t$d, [$a];",
44020b57cec5SDimitry Andric              []>;
4403349cc55cSDimitry Andricdef TXQ_CHANNEL_ORDER_I
4404349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4405349cc55cSDimitry Andric              "txq.channel_order.b32 \t$d, [$a];",
4406349cc55cSDimitry Andric              []>;
4407349cc55cSDimitry Andricdef TXQ_CHANNEL_DATA_TYPE_R
44080b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44090b57cec5SDimitry Andric              "txq.channel_data_type.b32 \t$d, [$a];",
44100b57cec5SDimitry Andric              []>;
4411349cc55cSDimitry Andricdef TXQ_CHANNEL_DATA_TYPE_I
4412349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4413349cc55cSDimitry Andric              "txq.channel_data_type.b32 \t$d, [$a];",
4414349cc55cSDimitry Andric              []>;
4415349cc55cSDimitry Andricdef TXQ_WIDTH_R
44160b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44170b57cec5SDimitry Andric              "txq.width.b32 \t$d, [$a];",
44180b57cec5SDimitry Andric              []>;
4419349cc55cSDimitry Andricdef TXQ_WIDTH_I
4420349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4421349cc55cSDimitry Andric              "txq.width.b32 \t$d, [$a];",
4422349cc55cSDimitry Andric              []>;
4423349cc55cSDimitry Andricdef TXQ_HEIGHT_R
44240b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44250b57cec5SDimitry Andric              "txq.height.b32 \t$d, [$a];",
44260b57cec5SDimitry Andric              []>;
4427349cc55cSDimitry Andricdef TXQ_HEIGHT_I
4428349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4429349cc55cSDimitry Andric              "txq.height.b32 \t$d, [$a];",
4430349cc55cSDimitry Andric              []>;
4431349cc55cSDimitry Andricdef TXQ_DEPTH_R
44320b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44330b57cec5SDimitry Andric              "txq.depth.b32 \t$d, [$a];",
44340b57cec5SDimitry Andric              []>;
4435349cc55cSDimitry Andricdef TXQ_DEPTH_I
4436349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4437349cc55cSDimitry Andric              "txq.depth.b32 \t$d, [$a];",
4438349cc55cSDimitry Andric              []>;
4439349cc55cSDimitry Andricdef TXQ_ARRAY_SIZE_R
44400b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44410b57cec5SDimitry Andric              "txq.array_size.b32 \t$d, [$a];",
44420b57cec5SDimitry Andric              []>;
4443349cc55cSDimitry Andricdef TXQ_ARRAY_SIZE_I
4444349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4445349cc55cSDimitry Andric              "txq.array_size.b32 \t$d, [$a];",
4446349cc55cSDimitry Andric              []>;
4447349cc55cSDimitry Andricdef TXQ_NUM_SAMPLES_R
44480b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44490b57cec5SDimitry Andric              "txq.num_samples.b32 \t$d, [$a];",
44500b57cec5SDimitry Andric              []>;
4451349cc55cSDimitry Andricdef TXQ_NUM_SAMPLES_I
4452349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4453349cc55cSDimitry Andric              "txq.num_samples.b32 \t$d, [$a];",
4454349cc55cSDimitry Andric              []>;
4455349cc55cSDimitry Andricdef TXQ_NUM_MIPMAP_LEVELS_R
44560b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44570b57cec5SDimitry Andric              "txq.num_mipmap_levels.b32 \t$d, [$a];",
44580b57cec5SDimitry Andric              []>;
4459349cc55cSDimitry Andricdef TXQ_NUM_MIPMAP_LEVELS_I
4460349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4461349cc55cSDimitry Andric              "txq.num_mipmap_levels.b32 \t$d, [$a];",
4462349cc55cSDimitry Andric              []>;
44630b57cec5SDimitry Andric}
44640b57cec5SDimitry Andric
44650b57cec5SDimitry Andricdef : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
4466349cc55cSDimitry Andric          (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
44670b57cec5SDimitry Andricdef : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
4468349cc55cSDimitry Andric          (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
44690b57cec5SDimitry Andricdef : Pat<(int_nvvm_txq_width Int64Regs:$a),
4470349cc55cSDimitry Andric          (TXQ_WIDTH_R Int64Regs:$a)>;
44710b57cec5SDimitry Andricdef : Pat<(int_nvvm_txq_height Int64Regs:$a),
4472349cc55cSDimitry Andric          (TXQ_HEIGHT_R Int64Regs:$a)>;
44730b57cec5SDimitry Andricdef : Pat<(int_nvvm_txq_depth Int64Regs:$a),
4474349cc55cSDimitry Andric          (TXQ_DEPTH_R Int64Regs:$a)>;
44750b57cec5SDimitry Andricdef : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
4476349cc55cSDimitry Andric          (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
44770b57cec5SDimitry Andricdef : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
4478349cc55cSDimitry Andric          (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
44790b57cec5SDimitry Andricdef : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
4480349cc55cSDimitry Andric          (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
44810b57cec5SDimitry Andric
44820b57cec5SDimitry Andric
44830b57cec5SDimitry Andric//-----------------------------------
44840b57cec5SDimitry Andric// Surface Query Intrinsics
44850b57cec5SDimitry Andric//-----------------------------------
44860b57cec5SDimitry Andric
4487e8d8bef9SDimitry Andriclet IsSurfTexQuery = true in {
4488349cc55cSDimitry Andricdef SUQ_CHANNEL_ORDER_R
44890b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44900b57cec5SDimitry Andric              "suq.channel_order.b32 \t$d, [$a];",
44910b57cec5SDimitry Andric              []>;
4492349cc55cSDimitry Andricdef SUQ_CHANNEL_ORDER_I
4493349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4494349cc55cSDimitry Andric              "suq.channel_order.b32 \t$d, [$a];",
4495349cc55cSDimitry Andric              []>;
4496349cc55cSDimitry Andricdef SUQ_CHANNEL_DATA_TYPE_R
44970b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
44980b57cec5SDimitry Andric              "suq.channel_data_type.b32 \t$d, [$a];",
44990b57cec5SDimitry Andric              []>;
4500349cc55cSDimitry Andricdef SUQ_CHANNEL_DATA_TYPE_I
4501349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4502349cc55cSDimitry Andric              "suq.channel_data_type.b32 \t$d, [$a];",
4503349cc55cSDimitry Andric              []>;
4504349cc55cSDimitry Andricdef SUQ_WIDTH_R
45050b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
45060b57cec5SDimitry Andric              "suq.width.b32 \t$d, [$a];",
45070b57cec5SDimitry Andric              []>;
4508349cc55cSDimitry Andricdef SUQ_WIDTH_I
4509349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4510349cc55cSDimitry Andric              "suq.width.b32 \t$d, [$a];",
4511349cc55cSDimitry Andric              []>;
4512349cc55cSDimitry Andricdef SUQ_HEIGHT_R
45130b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
45140b57cec5SDimitry Andric              "suq.height.b32 \t$d, [$a];",
45150b57cec5SDimitry Andric              []>;
4516349cc55cSDimitry Andricdef SUQ_HEIGHT_I
4517349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4518349cc55cSDimitry Andric              "suq.height.b32 \t$d, [$a];",
4519349cc55cSDimitry Andric              []>;
4520349cc55cSDimitry Andricdef SUQ_DEPTH_R
45210b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
45220b57cec5SDimitry Andric              "suq.depth.b32 \t$d, [$a];",
45230b57cec5SDimitry Andric              []>;
4524349cc55cSDimitry Andricdef SUQ_DEPTH_I
4525349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4526349cc55cSDimitry Andric              "suq.depth.b32 \t$d, [$a];",
4527349cc55cSDimitry Andric              []>;
4528349cc55cSDimitry Andricdef SUQ_ARRAY_SIZE_R
45290b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
45300b57cec5SDimitry Andric              "suq.array_size.b32 \t$d, [$a];",
45310b57cec5SDimitry Andric              []>;
4532349cc55cSDimitry Andricdef SUQ_ARRAY_SIZE_I
4533349cc55cSDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
4534349cc55cSDimitry Andric              "suq.array_size.b32 \t$d, [$a];",
4535349cc55cSDimitry Andric              []>;
45360b57cec5SDimitry Andric}
45370b57cec5SDimitry Andric
45380b57cec5SDimitry Andricdef : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
4539349cc55cSDimitry Andric          (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
45400b57cec5SDimitry Andricdef : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
4541349cc55cSDimitry Andric          (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
45420b57cec5SDimitry Andricdef : Pat<(int_nvvm_suq_width Int64Regs:$a),
4543349cc55cSDimitry Andric          (SUQ_WIDTH_R Int64Regs:$a)>;
45440b57cec5SDimitry Andricdef : Pat<(int_nvvm_suq_height Int64Regs:$a),
4545349cc55cSDimitry Andric          (SUQ_HEIGHT_R Int64Regs:$a)>;
45460b57cec5SDimitry Andricdef : Pat<(int_nvvm_suq_depth Int64Regs:$a),
4547349cc55cSDimitry Andric          (SUQ_DEPTH_R Int64Regs:$a)>;
45480b57cec5SDimitry Andricdef : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
4549349cc55cSDimitry Andric          (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
45500b57cec5SDimitry Andric
45510b57cec5SDimitry Andric
45520b57cec5SDimitry Andric//===- Handle Query -------------------------------------------------------===//
45530b57cec5SDimitry Andric
45540b57cec5SDimitry Andric// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
45550b57cec5SDimitry Andricdef ISTYPEP_SAMPLER
45560b57cec5SDimitry Andric  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
45570b57cec5SDimitry Andric              "istypep.samplerref \t$d, $a;",
45580b57cec5SDimitry Andric              [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
45590b57cec5SDimitry Andricdef ISTYPEP_SURFACE
45600b57cec5SDimitry Andric  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
45610b57cec5SDimitry Andric              "istypep.surfref \t$d, $a;",
45620b57cec5SDimitry Andric              [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
45630b57cec5SDimitry Andricdef ISTYPEP_TEXTURE
45640b57cec5SDimitry Andric  : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
45650b57cec5SDimitry Andric              "istypep.texref \t$d, $a;",
45660b57cec5SDimitry Andric              [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
45670b57cec5SDimitry Andric
45680b57cec5SDimitry Andric//===- Surface Stores -----------------------------------------------------===//
45690b57cec5SDimitry Andric
4570e8d8bef9SDimitry Andriclet IsSust = true in {
45710b57cec5SDimitry Andric
4572349cc55cSDimitry Andricclass SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
4573349cc55cSDimitry Andric    : NVPTXInst<(outs),
4574349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, intype:$r)),
4575349cc55cSDimitry Andric                inst # " \t[$s, \\{$x\\}], \\{$r\\};",
4576349cc55cSDimitry Andric                []>;
4577349cc55cSDimitry Andricmulticlass SUST_1D<string inst, NVPTXRegClass intype> {
4578349cc55cSDimitry Andric  def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
4579349cc55cSDimitry Andric  def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
4580349cc55cSDimitry Andric}
45810b57cec5SDimitry Andric
4582349cc55cSDimitry Andricdefm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
4583349cc55cSDimitry Andricdefm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
4584349cc55cSDimitry Andricdefm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
4585349cc55cSDimitry Andricdefm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
45860b57cec5SDimitry Andric
4587349cc55cSDimitry Andricdefm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
4588349cc55cSDimitry Andricdefm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
4589349cc55cSDimitry Andricdefm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
4590349cc55cSDimitry Andricdefm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
45910b57cec5SDimitry Andric
4592349cc55cSDimitry Andricdefm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
4593349cc55cSDimitry Andricdefm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
4594349cc55cSDimitry Andricdefm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
4595349cc55cSDimitry Andricdefm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
45960b57cec5SDimitry Andric
4597349cc55cSDimitry Andricdefm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
4598349cc55cSDimitry Andricdefm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
4599349cc55cSDimitry Andricdefm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
46000b57cec5SDimitry Andric
4601349cc55cSDimitry Andricclass SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
46020b57cec5SDimitry Andric    : NVPTXInst<(outs),
4603349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
4604349cc55cSDimitry Andric                inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
46050b57cec5SDimitry Andric                []>;
4606349cc55cSDimitry Andricmulticlass SUST_1D_V2<string inst, NVPTXRegClass intype> {
4607349cc55cSDimitry Andric  def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4608349cc55cSDimitry Andric  def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
4609349cc55cSDimitry Andric}
46100b57cec5SDimitry Andric
4611349cc55cSDimitry Andricdefm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
4612349cc55cSDimitry Andricdefm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
4613349cc55cSDimitry Andricdefm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
4614349cc55cSDimitry Andricdefm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
46150b57cec5SDimitry Andric
4616349cc55cSDimitry Andricdefm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
4617349cc55cSDimitry Andricdefm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
4618349cc55cSDimitry Andricdefm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
4619349cc55cSDimitry Andricdefm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
46200b57cec5SDimitry Andric
4621349cc55cSDimitry Andricdefm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
4622349cc55cSDimitry Andricdefm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
4623349cc55cSDimitry Andricdefm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
4624349cc55cSDimitry Andricdefm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
46250b57cec5SDimitry Andric
4626349cc55cSDimitry Andricdefm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
4627349cc55cSDimitry Andricdefm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
4628349cc55cSDimitry Andricdefm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
46290b57cec5SDimitry Andric
4630349cc55cSDimitry Andricclass SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
4631349cc55cSDimitry Andric    : NVPTXInst<(outs),
4632349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
4633349cc55cSDimitry Andric                                intype:$b, intype:$a)),
4634349cc55cSDimitry Andric                inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
4635349cc55cSDimitry Andric                []>;
4636349cc55cSDimitry Andricmulticlass SUST_1D_V4<string inst, NVPTXRegClass intype> {
4637349cc55cSDimitry Andric  def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4638349cc55cSDimitry Andric  def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
4639349cc55cSDimitry Andric}
46400b57cec5SDimitry Andric
4641349cc55cSDimitry Andricdefm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
4642349cc55cSDimitry Andricdefm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
4643349cc55cSDimitry Andricdefm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
46440b57cec5SDimitry Andric
4645349cc55cSDimitry Andricdefm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
4646349cc55cSDimitry Andricdefm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
4647349cc55cSDimitry Andricdefm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
46480b57cec5SDimitry Andric
4649349cc55cSDimitry Andricdefm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
4650349cc55cSDimitry Andricdefm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
4651349cc55cSDimitry Andricdefm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
46520b57cec5SDimitry Andric
4653349cc55cSDimitry Andricdefm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
4654349cc55cSDimitry Andricdefm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
4655349cc55cSDimitry Andricdefm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
46560b57cec5SDimitry Andric
4657349cc55cSDimitry Andricclass SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
46580b57cec5SDimitry Andric    : NVPTXInst<(outs),
4659349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
4660349cc55cSDimitry Andric                inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
46610b57cec5SDimitry Andric                []>;
4662349cc55cSDimitry Andricmulticlass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
4663349cc55cSDimitry Andric  def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4664349cc55cSDimitry Andric  def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4665349cc55cSDimitry Andric}
46660b57cec5SDimitry Andric
4667349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B8_CLAMP
4668349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
4669349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B16_CLAMP
4670349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
4671349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B32_CLAMP
4672349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
4673349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B64_CLAMP
4674349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
46750b57cec5SDimitry Andric
4676349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B8_TRAP
4677349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
4678349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B16_TRAP
4679349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
4680349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B32_TRAP
4681349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
4682349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B64_TRAP
4683349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
46840b57cec5SDimitry Andric
4685349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B8_ZERO
4686349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
4687349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B16_ZERO
4688349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
4689349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B32_ZERO
4690349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
4691349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_B64_ZERO
4692349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
46930b57cec5SDimitry Andric
4694349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_B8_TRAP
4695349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
4696349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_B16_TRAP
4697349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
4698349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_B32_TRAP
4699349cc55cSDimitry Andric  : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
47000b57cec5SDimitry Andric
4701349cc55cSDimitry Andricclass SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
4702349cc55cSDimitry Andric    : NVPTXInst<(outs),
4703349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4704349cc55cSDimitry Andric                                intype:$r, intype:$g)),
4705349cc55cSDimitry Andric                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
4706349cc55cSDimitry Andric                []>;
4707349cc55cSDimitry Andricmulticlass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4708349cc55cSDimitry Andric  def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4709349cc55cSDimitry Andric  def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4710349cc55cSDimitry Andric}
47110b57cec5SDimitry Andric
4712349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B8_CLAMP
4713349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
4714349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B16_CLAMP
4715349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
4716349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B32_CLAMP
4717349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
4718349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B64_CLAMP
4719349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
47200b57cec5SDimitry Andric
4721349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B8_TRAP
4722349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
4723349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B16_TRAP
4724349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
4725349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B32_TRAP
4726349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
4727349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B64_TRAP
4728349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
47290b57cec5SDimitry Andric
4730349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B8_ZERO
4731349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
4732349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B16_ZERO
4733349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
4734349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B32_ZERO
4735349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
4736349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V2B64_ZERO
4737349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
47380b57cec5SDimitry Andric
4739349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_V2B8_TRAP
4740349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
4741349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_V2B16_TRAP
4742349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
4743349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_V2B32_TRAP
4744349cc55cSDimitry Andric  : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
47450b57cec5SDimitry Andric
4746349cc55cSDimitry Andricclass SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
47470b57cec5SDimitry Andric    : NVPTXInst<(outs),
4748349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
4749349cc55cSDimitry Andric                                intype:$r, intype:$g, intype:$b, intype:$a)),
4750349cc55cSDimitry Andric                inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
47510b57cec5SDimitry Andric                []>;
4752349cc55cSDimitry Andricmulticlass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4753349cc55cSDimitry Andric  def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4754349cc55cSDimitry Andric  def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4755349cc55cSDimitry Andric}
47560b57cec5SDimitry Andric
4757349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B8_CLAMP
4758349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
4759349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B16_CLAMP
4760349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
4761349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B32_CLAMP
4762349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
47630b57cec5SDimitry Andric
4764349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B8_TRAP
4765349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
4766349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B16_TRAP
4767349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
4768349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B32_TRAP
4769349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
47700b57cec5SDimitry Andric
4771349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B8_ZERO
4772349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
4773349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B16_ZERO
4774349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
4775349cc55cSDimitry Andricdefm SUST_B_1D_ARRAY_V4B32_ZERO
4776349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
47770b57cec5SDimitry Andric
4778349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_V4B8_TRAP
4779349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
4780349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_V4B16_TRAP
4781349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
4782349cc55cSDimitry Andricdefm SUST_P_1D_ARRAY_V4B32_TRAP
4783349cc55cSDimitry Andric  : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
47840b57cec5SDimitry Andric
4785349cc55cSDimitry Andricclass SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
4786349cc55cSDimitry Andric    : NVPTXInst<(outs),
4787349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
4788349cc55cSDimitry Andric                inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
4789349cc55cSDimitry Andric                []>;
4790349cc55cSDimitry Andricmulticlass SUST_2D<string inst, NVPTXRegClass intype> {
4791349cc55cSDimitry Andric  def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
4792349cc55cSDimitry Andric  def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
4793349cc55cSDimitry Andric}
47940b57cec5SDimitry Andric
4795349cc55cSDimitry Andricdefm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
4796349cc55cSDimitry Andricdefm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
4797349cc55cSDimitry Andricdefm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
4798349cc55cSDimitry Andricdefm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
47990b57cec5SDimitry Andric
4800349cc55cSDimitry Andricdefm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
4801349cc55cSDimitry Andricdefm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
4802349cc55cSDimitry Andricdefm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
4803349cc55cSDimitry Andricdefm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
48040b57cec5SDimitry Andric
4805349cc55cSDimitry Andricdefm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
4806349cc55cSDimitry Andricdefm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
4807349cc55cSDimitry Andricdefm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
4808349cc55cSDimitry Andricdefm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
48090b57cec5SDimitry Andric
4810349cc55cSDimitry Andricdefm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
4811349cc55cSDimitry Andricdefm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
4812349cc55cSDimitry Andricdefm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
48130b57cec5SDimitry Andric
4814349cc55cSDimitry Andricclass SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
48150b57cec5SDimitry Andric    : NVPTXInst<(outs),
4816349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4817349cc55cSDimitry Andric                                intype:$r, intype:$g)),
4818349cc55cSDimitry Andric                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
48190b57cec5SDimitry Andric                []>;
4820349cc55cSDimitry Andricmulticlass SUST_2D_V2<string inst, NVPTXRegClass intype> {
4821349cc55cSDimitry Andric  def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
4822349cc55cSDimitry Andric  def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
4823349cc55cSDimitry Andric}
48240b57cec5SDimitry Andric
4825349cc55cSDimitry Andricdefm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
4826349cc55cSDimitry Andricdefm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
4827349cc55cSDimitry Andricdefm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
4828349cc55cSDimitry Andricdefm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
48290b57cec5SDimitry Andric
4830349cc55cSDimitry Andricdefm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
4831349cc55cSDimitry Andricdefm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
4832349cc55cSDimitry Andricdefm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
4833349cc55cSDimitry Andricdefm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
48340b57cec5SDimitry Andric
4835349cc55cSDimitry Andricdefm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
4836349cc55cSDimitry Andricdefm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
4837349cc55cSDimitry Andricdefm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
4838349cc55cSDimitry Andricdefm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
48390b57cec5SDimitry Andric
4840349cc55cSDimitry Andricdefm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
4841349cc55cSDimitry Andricdefm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
4842349cc55cSDimitry Andricdefm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
4843349cc55cSDimitry Andric
4844349cc55cSDimitry Andricclass SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
48450b57cec5SDimitry Andric    : NVPTXInst<(outs),
4846349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
4847349cc55cSDimitry Andric                                intype:$r, intype:$g, intype:$b, intype:$a)),
4848349cc55cSDimitry Andric                inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
48490b57cec5SDimitry Andric                []>;
4850349cc55cSDimitry Andricmulticlass SUST_2D_V4<string inst, NVPTXRegClass intype> {
4851349cc55cSDimitry Andric  def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
4852349cc55cSDimitry Andric  def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
4853349cc55cSDimitry Andric}
4854349cc55cSDimitry Andric
4855349cc55cSDimitry Andricdefm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
4856349cc55cSDimitry Andricdefm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
4857349cc55cSDimitry Andricdefm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
4858349cc55cSDimitry Andric
4859349cc55cSDimitry Andricdefm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
4860349cc55cSDimitry Andricdefm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
4861349cc55cSDimitry Andricdefm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
4862349cc55cSDimitry Andric
4863349cc55cSDimitry Andricdefm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
4864349cc55cSDimitry Andricdefm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
4865349cc55cSDimitry Andricdefm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
4866349cc55cSDimitry Andric
4867349cc55cSDimitry Andricdefm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
4868349cc55cSDimitry Andricdefm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
4869349cc55cSDimitry Andricdefm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
4870349cc55cSDimitry Andric
4871349cc55cSDimitry Andricclass SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
48720b57cec5SDimitry Andric    : NVPTXInst<(outs),
4873349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4874349cc55cSDimitry Andric                                intype:$r)),
4875349cc55cSDimitry Andric                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
48760b57cec5SDimitry Andric                []>;
4877349cc55cSDimitry Andricmulticlass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
4878349cc55cSDimitry Andric  def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
4879349cc55cSDimitry Andric  def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
4880349cc55cSDimitry Andric}
4881349cc55cSDimitry Andric
4882349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B8_CLAMP
4883349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
4884349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B16_CLAMP
4885349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
4886349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B32_CLAMP
4887349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
4888349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B64_CLAMP
4889349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
4890349cc55cSDimitry Andric
4891349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B8_TRAP
4892349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
4893349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B16_TRAP
4894349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
4895349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B32_TRAP
4896349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
4897349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B64_TRAP
4898349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
4899349cc55cSDimitry Andric
4900349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B8_ZERO
4901349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
4902349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B16_ZERO
4903349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
4904349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B32_ZERO
4905349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
4906349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_B64_ZERO
4907349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
4908349cc55cSDimitry Andric
4909349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_B8_TRAP
4910349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
4911349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_B16_TRAP
4912349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
4913349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_B32_TRAP
4914349cc55cSDimitry Andric  : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
4915349cc55cSDimitry Andric
4916349cc55cSDimitry Andricclass SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
49170b57cec5SDimitry Andric    : NVPTXInst<(outs),
4918349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4919349cc55cSDimitry Andric                                intype:$r, intype:$g)),
4920349cc55cSDimitry Andric                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
49210b57cec5SDimitry Andric                []>;
4922349cc55cSDimitry Andricmulticlass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
4923349cc55cSDimitry Andric  def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
4924349cc55cSDimitry Andric  def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
4925349cc55cSDimitry Andric}
4926349cc55cSDimitry Andric
4927349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B8_CLAMP
4928349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
4929349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B16_CLAMP
4930349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
4931349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B32_CLAMP
4932349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
4933349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B64_CLAMP
4934349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
4935349cc55cSDimitry Andric
4936349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B8_TRAP
4937349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
4938349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B16_TRAP
4939349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
4940349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B32_TRAP
4941349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
4942349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B64_TRAP
4943349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
4944349cc55cSDimitry Andric
4945349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B8_ZERO
4946349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
4947349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B16_ZERO
4948349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
4949349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B32_ZERO
4950349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
4951349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V2B64_ZERO
4952349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
4953349cc55cSDimitry Andric
4954349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_V2B8_TRAP
4955349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
4956349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_V2B16_TRAP
4957349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
4958349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_V2B32_TRAP
4959349cc55cSDimitry Andric  : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
4960349cc55cSDimitry Andric
4961349cc55cSDimitry Andricclass SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
49620b57cec5SDimitry Andric    : NVPTXInst<(outs),
4963349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
4964349cc55cSDimitry Andric                                intype:$r, intype:$g, intype:$b, intype:$a)),
4965349cc55cSDimitry Andric                inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
49660b57cec5SDimitry Andric                []>;
4967349cc55cSDimitry Andricmulticlass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
4968349cc55cSDimitry Andric  def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
4969349cc55cSDimitry Andric  def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
4970349cc55cSDimitry Andric}
4971349cc55cSDimitry Andric
4972349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B8_CLAMP
4973349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
4974349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B16_CLAMP
4975349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
4976349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B32_CLAMP
4977349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
4978349cc55cSDimitry Andric
4979349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B8_TRAP
4980349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
4981349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B16_TRAP
4982349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
4983349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B32_TRAP
4984349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
4985349cc55cSDimitry Andric
4986349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B8_ZERO
4987349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
4988349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B16_ZERO
4989349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
4990349cc55cSDimitry Andricdefm SUST_B_2D_ARRAY_V4B32_ZERO
4991349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
4992349cc55cSDimitry Andric
4993349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_V4B8_TRAP
4994349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
4995349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_V4B16_TRAP
4996349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
4997349cc55cSDimitry Andricdefm SUST_P_2D_ARRAY_V4B32_TRAP
4998349cc55cSDimitry Andric  : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
4999349cc55cSDimitry Andric
5000349cc55cSDimitry Andricclass SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
50010b57cec5SDimitry Andric    : NVPTXInst<(outs),
5002349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5003349cc55cSDimitry Andric                                intype:$r)),
5004349cc55cSDimitry Andric                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
50050b57cec5SDimitry Andric                []>;
5006349cc55cSDimitry Andricmulticlass SUST_3D<string inst, NVPTXRegClass intype> {
5007349cc55cSDimitry Andric  def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
5008349cc55cSDimitry Andric  def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
5009349cc55cSDimitry Andric}
5010349cc55cSDimitry Andric
5011349cc55cSDimitry Andricdefm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
5012349cc55cSDimitry Andricdefm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
5013349cc55cSDimitry Andricdefm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
5014349cc55cSDimitry Andricdefm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
5015349cc55cSDimitry Andric
5016349cc55cSDimitry Andricdefm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
5017349cc55cSDimitry Andricdefm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
5018349cc55cSDimitry Andricdefm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
5019349cc55cSDimitry Andricdefm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
5020349cc55cSDimitry Andric
5021349cc55cSDimitry Andricdefm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
5022349cc55cSDimitry Andricdefm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
5023349cc55cSDimitry Andricdefm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
5024349cc55cSDimitry Andricdefm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
5025349cc55cSDimitry Andric
5026349cc55cSDimitry Andricdefm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
5027349cc55cSDimitry Andricdefm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
5028349cc55cSDimitry Andricdefm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
5029349cc55cSDimitry Andric
5030349cc55cSDimitry Andricclass SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
50310b57cec5SDimitry Andric    : NVPTXInst<(outs),
5032349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5033349cc55cSDimitry Andric                                intype:$r, intype:$g)),
5034349cc55cSDimitry Andric                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
50350b57cec5SDimitry Andric                []>;
5036349cc55cSDimitry Andricmulticlass SUST_3D_V2<string inst, NVPTXRegClass intype> {
5037349cc55cSDimitry Andric  def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
5038349cc55cSDimitry Andric  def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
5039349cc55cSDimitry Andric}
5040349cc55cSDimitry Andric
5041349cc55cSDimitry Andricdefm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
5042349cc55cSDimitry Andricdefm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
5043349cc55cSDimitry Andricdefm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
5044349cc55cSDimitry Andricdefm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
5045349cc55cSDimitry Andric
5046349cc55cSDimitry Andricdefm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
5047349cc55cSDimitry Andricdefm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
5048349cc55cSDimitry Andricdefm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
5049349cc55cSDimitry Andricdefm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
5050349cc55cSDimitry Andric
5051349cc55cSDimitry Andricdefm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
5052349cc55cSDimitry Andricdefm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
5053349cc55cSDimitry Andricdefm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
5054349cc55cSDimitry Andricdefm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
5055349cc55cSDimitry Andric
5056349cc55cSDimitry Andricdefm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
5057349cc55cSDimitry Andricdefm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
5058349cc55cSDimitry Andricdefm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
5059349cc55cSDimitry Andric
5060349cc55cSDimitry Andricclass SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
50610b57cec5SDimitry Andric    : NVPTXInst<(outs),
5062349cc55cSDimitry Andric                !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
5063349cc55cSDimitry Andric                                intype:$r, intype:$g, intype:$b, intype:$a)),
5064349cc55cSDimitry Andric                inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
50650b57cec5SDimitry Andric                []>;
5066349cc55cSDimitry Andricmulticlass SUST_3D_V4<string inst, NVPTXRegClass intype> {
5067349cc55cSDimitry Andric  def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
5068349cc55cSDimitry Andric  def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
5069349cc55cSDimitry Andric}
5070349cc55cSDimitry Andric
5071349cc55cSDimitry Andricdefm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
5072349cc55cSDimitry Andricdefm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
5073349cc55cSDimitry Andricdefm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
5074349cc55cSDimitry Andric
5075349cc55cSDimitry Andricdefm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
5076349cc55cSDimitry Andricdefm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
5077349cc55cSDimitry Andricdefm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
5078349cc55cSDimitry Andric
5079349cc55cSDimitry Andricdefm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
5080349cc55cSDimitry Andricdefm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
5081349cc55cSDimitry Andricdefm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
5082349cc55cSDimitry Andric
5083349cc55cSDimitry Andricdefm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
5084349cc55cSDimitry Andricdefm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
5085349cc55cSDimitry Andricdefm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
5086349cc55cSDimitry Andric
50870b57cec5SDimitry Andric}
50880b57cec5SDimitry Andric
50890b57cec5SDimitry Andric// Surface store instruction patterns
50900b57cec5SDimitry Andric// I'm not sure why we can't just include these in the instruction definitions,
50910b57cec5SDimitry Andric// but TableGen complains of type errors :(
50920b57cec5SDimitry Andric
50930b57cec5SDimitry Andric// .clamp variant
50940b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i8_clamp
50950b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5096349cc55cSDimitry Andric          (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
50970b57cec5SDimitry Andric
50980b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i16_clamp
50990b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5100349cc55cSDimitry Andric          (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
51010b57cec5SDimitry Andric
51020b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i32_clamp
51030b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5104349cc55cSDimitry Andric          (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
51050b57cec5SDimitry Andric
51060b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i64_clamp
51070b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5108349cc55cSDimitry Andric          (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
51090b57cec5SDimitry Andric
51100b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
51110b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5112349cc55cSDimitry Andric          (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
51130b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
51140b57cec5SDimitry Andric
51150b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
51160b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5117349cc55cSDimitry Andric          (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
51180b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
51190b57cec5SDimitry Andric
51200b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
51210b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5122349cc55cSDimitry Andric          (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
51230b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
51240b57cec5SDimitry Andric
51250b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
51260b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5127349cc55cSDimitry Andric          (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
51280b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
51290b57cec5SDimitry Andric
51300b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
51310b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
51320b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5133349cc55cSDimitry Andric          (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
51340b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
51350b57cec5SDimitry Andric
51360b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
51370b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
51380b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5139349cc55cSDimitry Andric          (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
51400b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
51410b57cec5SDimitry Andric
51420b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
51430b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
51440b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5145349cc55cSDimitry Andric          (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
51460b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
51470b57cec5SDimitry Andric
51480b57cec5SDimitry Andric
51490b57cec5SDimitry Andric
51500b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
51510b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5152349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51530b57cec5SDimitry Andric           Int16Regs:$r)>;
51540b57cec5SDimitry Andric
51550b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
51560b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5157349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51580b57cec5SDimitry Andric           Int16Regs:$r)>;
51590b57cec5SDimitry Andric
51600b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
51610b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5162349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51630b57cec5SDimitry Andric           Int32Regs:$r)>;
51640b57cec5SDimitry Andric
51650b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
51660b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5167349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51680b57cec5SDimitry Andric           Int64Regs:$r)>;
51690b57cec5SDimitry Andric
51700b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
51710b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5172349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51730b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
51740b57cec5SDimitry Andric
51750b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
51760b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5177349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51780b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
51790b57cec5SDimitry Andric
51800b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
51810b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5182349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51830b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
51840b57cec5SDimitry Andric
51850b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
51860b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5187349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51880b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
51890b57cec5SDimitry Andric
51900b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
51910b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51920b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5193349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51940b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
51950b57cec5SDimitry Andric
51960b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
51970b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
51980b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5199349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
52000b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
52010b57cec5SDimitry Andric
52020b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
52030b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
52040b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5205349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
52060b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
52070b57cec5SDimitry Andric
52080b57cec5SDimitry Andric
52090b57cec5SDimitry Andric
52100b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i8_clamp
52110b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5212349cc55cSDimitry Andric          (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52130b57cec5SDimitry Andric           Int16Regs:$r)>;
52140b57cec5SDimitry Andric
52150b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i16_clamp
52160b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5217349cc55cSDimitry Andric          (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52180b57cec5SDimitry Andric           Int16Regs:$r)>;
52190b57cec5SDimitry Andric
52200b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i32_clamp
52210b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5222349cc55cSDimitry Andric          (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52230b57cec5SDimitry Andric           Int32Regs:$r)>;
52240b57cec5SDimitry Andric
52250b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i64_clamp
52260b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5227349cc55cSDimitry Andric          (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52280b57cec5SDimitry Andric           Int64Regs:$r)>;
52290b57cec5SDimitry Andric
52300b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
52310b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5232349cc55cSDimitry Andric          (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52330b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
52340b57cec5SDimitry Andric
52350b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
52360b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5237349cc55cSDimitry Andric          (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52380b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
52390b57cec5SDimitry Andric
52400b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
52410b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5242349cc55cSDimitry Andric          (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52430b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
52440b57cec5SDimitry Andric
52450b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
52460b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5247349cc55cSDimitry Andric          (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52480b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
52490b57cec5SDimitry Andric
52500b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
52510b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52520b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5253349cc55cSDimitry Andric          (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52540b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
52550b57cec5SDimitry Andric
52560b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
52570b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52580b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5259349cc55cSDimitry Andric          (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52600b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
52610b57cec5SDimitry Andric
52620b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
52630b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52640b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5265349cc55cSDimitry Andric          (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
52660b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
52670b57cec5SDimitry Andric
52680b57cec5SDimitry Andric
52690b57cec5SDimitry Andric
52700b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
52710b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5272349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
52730b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
52740b57cec5SDimitry Andric           Int16Regs:$r)>;
52750b57cec5SDimitry Andric
52760b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
52770b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5278349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
52790b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
52800b57cec5SDimitry Andric           Int16Regs:$r)>;
52810b57cec5SDimitry Andric
52820b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
52830b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5284349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
52850b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
52860b57cec5SDimitry Andric           Int32Regs:$r)>;
52870b57cec5SDimitry Andric
52880b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
52890b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5290349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
52910b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
52920b57cec5SDimitry Andric           Int64Regs:$r)>;
52930b57cec5SDimitry Andric
52940b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
52950b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
52960b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5297349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
52980b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
52990b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
53000b57cec5SDimitry Andric
53010b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
53020b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
53030b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5304349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
53050b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
53060b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
53070b57cec5SDimitry Andric
53080b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
53090b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
53100b57cec5SDimitry Andric           Int32Regs:$g),
5311349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
53120b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
53130b57cec5SDimitry Andric
53140b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
53150b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
53160b57cec5SDimitry Andric           Int64Regs:$g),
5317349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
53180b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
53190b57cec5SDimitry Andric
53200b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
53210b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
53220b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5323349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
53240b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
53250b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
53260b57cec5SDimitry Andric
53270b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
53280b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
53290b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5330349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
53310b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
53320b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
53330b57cec5SDimitry Andric
53340b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
53350b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
53360b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5337349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
53380b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
53390b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
53400b57cec5SDimitry Andric
53410b57cec5SDimitry Andric
53420b57cec5SDimitry Andric
53430b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i8_clamp
53440b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53450b57cec5SDimitry Andric           Int16Regs:$r),
5346349cc55cSDimitry Andric          (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
53470b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53480b57cec5SDimitry Andric           Int16Regs:$r)>;
53490b57cec5SDimitry Andric
53500b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i16_clamp
53510b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53520b57cec5SDimitry Andric           Int16Regs:$r),
5353349cc55cSDimitry Andric          (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
53540b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53550b57cec5SDimitry Andric           Int16Regs:$r)>;
53560b57cec5SDimitry Andric
53570b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i32_clamp
53580b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53590b57cec5SDimitry Andric           Int32Regs:$r),
5360349cc55cSDimitry Andric          (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
53610b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53620b57cec5SDimitry Andric           Int32Regs:$r)>;
53630b57cec5SDimitry Andric
53640b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i64_clamp
53650b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53660b57cec5SDimitry Andric           Int64Regs:$r),
5367349cc55cSDimitry Andric          (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
53680b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53690b57cec5SDimitry Andric           Int64Regs:$r)>;
53700b57cec5SDimitry Andric
53710b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
53720b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53730b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5374349cc55cSDimitry Andric          (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
53750b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53760b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
53770b57cec5SDimitry Andric
53780b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
53790b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53800b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5381349cc55cSDimitry Andric          (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
53820b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53830b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
53840b57cec5SDimitry Andric
53850b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
53860b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53870b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g),
5388349cc55cSDimitry Andric          (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
53890b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53900b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
53910b57cec5SDimitry Andric
53920b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
53930b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53940b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g),
5395349cc55cSDimitry Andric          (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
53960b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
53970b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
53980b57cec5SDimitry Andric
53990b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
54000b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
54010b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5402349cc55cSDimitry Andric          (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
54030b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
54040b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
54050b57cec5SDimitry Andric
54060b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
54070b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
54080b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5409349cc55cSDimitry Andric          (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
54100b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
54110b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
54120b57cec5SDimitry Andric
54130b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
54140b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
54150b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5416349cc55cSDimitry Andric          (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
54170b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
54180b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
54190b57cec5SDimitry Andric
54200b57cec5SDimitry Andric
54210b57cec5SDimitry Andric// .trap variant
54220b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i8_trap
54230b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5424349cc55cSDimitry Andric          (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
54250b57cec5SDimitry Andric
54260b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i16_trap
54270b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5428349cc55cSDimitry Andric          (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
54290b57cec5SDimitry Andric
54300b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i32_trap
54310b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5432349cc55cSDimitry Andric          (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
54330b57cec5SDimitry Andric
54340b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i64_trap
54350b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5436349cc55cSDimitry Andric          (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
54370b57cec5SDimitry Andric
54380b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i8_trap
54390b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5440349cc55cSDimitry Andric          (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
54410b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
54420b57cec5SDimitry Andric
54430b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i16_trap
54440b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5445349cc55cSDimitry Andric          (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
54460b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
54470b57cec5SDimitry Andric
54480b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i32_trap
54490b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5450349cc55cSDimitry Andric          (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
54510b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
54520b57cec5SDimitry Andric
54530b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i64_trap
54540b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5455349cc55cSDimitry Andric          (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
54560b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
54570b57cec5SDimitry Andric
54580b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i8_trap
54590b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
54600b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5461349cc55cSDimitry Andric          (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
54620b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
54630b57cec5SDimitry Andric
54640b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i16_trap
54650b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
54660b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5467349cc55cSDimitry Andric          (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
54680b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
54690b57cec5SDimitry Andric
54700b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i32_trap
54710b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
54720b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5473349cc55cSDimitry Andric          (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
54740b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
54750b57cec5SDimitry Andric
54760b57cec5SDimitry Andric
54770b57cec5SDimitry Andric
54780b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i8_trap
54790b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5480349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
54810b57cec5SDimitry Andric           Int16Regs:$r)>;
54820b57cec5SDimitry Andric
54830b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i16_trap
54840b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5485349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
54860b57cec5SDimitry Andric           Int16Regs:$r)>;
54870b57cec5SDimitry Andric
54880b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i32_trap
54890b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5490349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
54910b57cec5SDimitry Andric           Int32Regs:$r)>;
54920b57cec5SDimitry Andric
54930b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i64_trap
54940b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5495349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
54960b57cec5SDimitry Andric           Int64Regs:$r)>;
54970b57cec5SDimitry Andric
54980b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
54990b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5500349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55010b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
55020b57cec5SDimitry Andric
55030b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
55040b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5505349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55060b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
55070b57cec5SDimitry Andric
55080b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
55090b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5510349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55110b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
55120b57cec5SDimitry Andric
55130b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
55140b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5515349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55160b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
55170b57cec5SDimitry Andric
55180b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
55190b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55200b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5521349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55220b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
55230b57cec5SDimitry Andric
55240b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
55250b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55260b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5527349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55280b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
55290b57cec5SDimitry Andric
55300b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
55310b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55320b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5533349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
55340b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
55350b57cec5SDimitry Andric
55360b57cec5SDimitry Andric
55370b57cec5SDimitry Andric
55380b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i8_trap
55390b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5540349cc55cSDimitry Andric          (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55410b57cec5SDimitry Andric           Int16Regs:$r)>;
55420b57cec5SDimitry Andric
55430b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i16_trap
55440b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5545349cc55cSDimitry Andric          (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55460b57cec5SDimitry Andric           Int16Regs:$r)>;
55470b57cec5SDimitry Andric
55480b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i32_trap
55490b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5550349cc55cSDimitry Andric          (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55510b57cec5SDimitry Andric           Int32Regs:$r)>;
55520b57cec5SDimitry Andric
55530b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i64_trap
55540b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5555349cc55cSDimitry Andric          (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55560b57cec5SDimitry Andric           Int64Regs:$r)>;
55570b57cec5SDimitry Andric
55580b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i8_trap
55590b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5560349cc55cSDimitry Andric          (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55610b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
55620b57cec5SDimitry Andric
55630b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i16_trap
55640b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5565349cc55cSDimitry Andric          (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55660b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
55670b57cec5SDimitry Andric
55680b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i32_trap
55690b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5570349cc55cSDimitry Andric          (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55710b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
55720b57cec5SDimitry Andric
55730b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i64_trap
55740b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5575349cc55cSDimitry Andric          (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55760b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
55770b57cec5SDimitry Andric
55780b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i8_trap
55790b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55800b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5581349cc55cSDimitry Andric          (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55820b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
55830b57cec5SDimitry Andric
55840b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i16_trap
55850b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55860b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5587349cc55cSDimitry Andric          (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55880b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
55890b57cec5SDimitry Andric
55900b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i32_trap
55910b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55920b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5593349cc55cSDimitry Andric          (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
55940b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
55950b57cec5SDimitry Andric
55960b57cec5SDimitry Andric
55970b57cec5SDimitry Andric
55980b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i8_trap
55990b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5600349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
56010b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56020b57cec5SDimitry Andric           Int16Regs:$r)>;
56030b57cec5SDimitry Andric
56040b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i16_trap
56050b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5606349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
56070b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56080b57cec5SDimitry Andric           Int16Regs:$r)>;
56090b57cec5SDimitry Andric
56100b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i32_trap
56110b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5612349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
56130b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56140b57cec5SDimitry Andric           Int32Regs:$r)>;
56150b57cec5SDimitry Andric
56160b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i64_trap
56170b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5618349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
56190b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56200b57cec5SDimitry Andric           Int64Regs:$r)>;
56210b57cec5SDimitry Andric
56220b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
56230b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56240b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5625349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
56260b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
56270b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
56280b57cec5SDimitry Andric
56290b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
56300b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56310b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5632349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
56330b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
56340b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
56350b57cec5SDimitry Andric
56360b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
56370b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
56380b57cec5SDimitry Andric           Int32Regs:$g),
5639349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
56400b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
56410b57cec5SDimitry Andric
56420b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
56430b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
56440b57cec5SDimitry Andric           Int64Regs:$g),
5645349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
56460b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
56470b57cec5SDimitry Andric
56480b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
56490b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56500b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5651349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
56520b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56530b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
56540b57cec5SDimitry Andric
56550b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
56560b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56570b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5658349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
56590b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56600b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
56610b57cec5SDimitry Andric
56620b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
56630b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
56640b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5665349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
56660b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
56670b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
56680b57cec5SDimitry Andric
56690b57cec5SDimitry Andric
56700b57cec5SDimitry Andric
56710b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i8_trap
56720b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
56730b57cec5SDimitry Andric           Int16Regs:$r),
5674349cc55cSDimitry Andric          (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
56750b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
56760b57cec5SDimitry Andric           Int16Regs:$r)>;
56770b57cec5SDimitry Andric
56780b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i16_trap
56790b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
56800b57cec5SDimitry Andric           Int16Regs:$r),
5681349cc55cSDimitry Andric          (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
56820b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
56830b57cec5SDimitry Andric           Int16Regs:$r)>;
56840b57cec5SDimitry Andric
56850b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i32_trap
56860b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
56870b57cec5SDimitry Andric           Int32Regs:$r),
5688349cc55cSDimitry Andric          (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
56890b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
56900b57cec5SDimitry Andric           Int32Regs:$r)>;
56910b57cec5SDimitry Andric
56920b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i64_trap
56930b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
56940b57cec5SDimitry Andric           Int64Regs:$r),
5695349cc55cSDimitry Andric          (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
56960b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
56970b57cec5SDimitry Andric           Int64Regs:$r)>;
56980b57cec5SDimitry Andric
56990b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i8_trap
57000b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57010b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5702349cc55cSDimitry Andric          (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
57030b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57040b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
57050b57cec5SDimitry Andric
57060b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i16_trap
57070b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57080b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5709349cc55cSDimitry Andric          (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
57100b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57110b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
57120b57cec5SDimitry Andric
57130b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i32_trap
57140b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57150b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g),
5716349cc55cSDimitry Andric          (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
57170b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57180b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
57190b57cec5SDimitry Andric
57200b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i64_trap
57210b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57220b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g),
5723349cc55cSDimitry Andric          (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
57240b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57250b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
57260b57cec5SDimitry Andric
57270b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i8_trap
57280b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57290b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5730349cc55cSDimitry Andric          (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
57310b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57320b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
57330b57cec5SDimitry Andric
57340b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i16_trap
57350b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57360b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5737349cc55cSDimitry Andric          (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
57380b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57390b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
57400b57cec5SDimitry Andric
57410b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i32_trap
57420b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57430b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5744349cc55cSDimitry Andric          (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
57450b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
57460b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
57470b57cec5SDimitry Andric
57480b57cec5SDimitry Andric
57490b57cec5SDimitry Andric// .zero variant
57500b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i8_zero
57510b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5752349cc55cSDimitry Andric          (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
57530b57cec5SDimitry Andric
57540b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i16_zero
57550b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
5756349cc55cSDimitry Andric          (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
57570b57cec5SDimitry Andric
57580b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i32_zero
57590b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
5760349cc55cSDimitry Andric          (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
57610b57cec5SDimitry Andric
57620b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_i64_zero
57630b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
5764349cc55cSDimitry Andric          (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
57650b57cec5SDimitry Andric
57660b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i8_zero
57670b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5768349cc55cSDimitry Andric          (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
57690b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
57700b57cec5SDimitry Andric
57710b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i16_zero
57720b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5773349cc55cSDimitry Andric          (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
57740b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
57750b57cec5SDimitry Andric
57760b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i32_zero
57770b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5778349cc55cSDimitry Andric          (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
57790b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
57800b57cec5SDimitry Andric
57810b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v2i64_zero
57820b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5783349cc55cSDimitry Andric          (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
57840b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
57850b57cec5SDimitry Andric
57860b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i8_zero
57870b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
57880b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5789349cc55cSDimitry Andric          (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
57900b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
57910b57cec5SDimitry Andric
57920b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i16_zero
57930b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
57940b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5795349cc55cSDimitry Andric          (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
57960b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
57970b57cec5SDimitry Andric
57980b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_v4i32_zero
57990b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
58000b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5801349cc55cSDimitry Andric          (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
58020b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
58030b57cec5SDimitry Andric
58040b57cec5SDimitry Andric
58050b57cec5SDimitry Andric
58060b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i8_zero
58070b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5808349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58090b57cec5SDimitry Andric           Int16Regs:$r)>;
58100b57cec5SDimitry Andric
58110b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i16_zero
58120b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
5813349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58140b57cec5SDimitry Andric           Int16Regs:$r)>;
58150b57cec5SDimitry Andric
58160b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i32_zero
58170b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
5818349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58190b57cec5SDimitry Andric           Int32Regs:$r)>;
58200b57cec5SDimitry Andric
58210b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_i64_zero
58220b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
5823349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58240b57cec5SDimitry Andric           Int64Regs:$r)>;
58250b57cec5SDimitry Andric
58260b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
58270b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5828349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58290b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
58300b57cec5SDimitry Andric
58310b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
58320b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
5833349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58340b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
58350b57cec5SDimitry Andric
58360b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
58370b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
5838349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58390b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
58400b57cec5SDimitry Andric
58410b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
58420b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
5843349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58440b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
58450b57cec5SDimitry Andric
58460b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
58470b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58480b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5849349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58500b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
58510b57cec5SDimitry Andric
58520b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
58530b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58540b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5855349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58560b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
58570b57cec5SDimitry Andric
58580b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
58590b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58600b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5861349cc55cSDimitry Andric          (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
58620b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
58630b57cec5SDimitry Andric
58640b57cec5SDimitry Andric
58650b57cec5SDimitry Andric
58660b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i8_zero
58670b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5868349cc55cSDimitry Andric          (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
58690b57cec5SDimitry Andric           Int16Regs:$r)>;
58700b57cec5SDimitry Andric
58710b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i16_zero
58720b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5873349cc55cSDimitry Andric          (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
58740b57cec5SDimitry Andric           Int16Regs:$r)>;
58750b57cec5SDimitry Andric
58760b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i32_zero
58770b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5878349cc55cSDimitry Andric          (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
58790b57cec5SDimitry Andric           Int32Regs:$r)>;
58800b57cec5SDimitry Andric
58810b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_i64_zero
58820b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5883349cc55cSDimitry Andric          (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
58840b57cec5SDimitry Andric           Int64Regs:$r)>;
58850b57cec5SDimitry Andric
58860b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i8_zero
58870b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5888349cc55cSDimitry Andric          (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
58890b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
58900b57cec5SDimitry Andric
58910b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i16_zero
58920b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
5893349cc55cSDimitry Andric          (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
58940b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
58950b57cec5SDimitry Andric
58960b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i32_zero
58970b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
5898349cc55cSDimitry Andric          (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
58990b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
59000b57cec5SDimitry Andric
59010b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v2i64_zero
59020b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
5903349cc55cSDimitry Andric          (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
59040b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
59050b57cec5SDimitry Andric
59060b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i8_zero
59070b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
59080b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5909349cc55cSDimitry Andric          (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
59100b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
59110b57cec5SDimitry Andric
59120b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i16_zero
59130b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
59140b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5915349cc55cSDimitry Andric          (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
59160b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
59170b57cec5SDimitry Andric
59180b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_v4i32_zero
59190b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
59200b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5921349cc55cSDimitry Andric          (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
59220b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
59230b57cec5SDimitry Andric
59240b57cec5SDimitry Andric
59250b57cec5SDimitry Andric
59260b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i8_zero
59270b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5928349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
59290b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59300b57cec5SDimitry Andric           Int16Regs:$r)>;
59310b57cec5SDimitry Andric
59320b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i16_zero
59330b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
5934349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
59350b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59360b57cec5SDimitry Andric           Int16Regs:$r)>;
59370b57cec5SDimitry Andric
59380b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i32_zero
59390b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
5940349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
59410b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59420b57cec5SDimitry Andric           Int32Regs:$r)>;
59430b57cec5SDimitry Andric
59440b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_i64_zero
59450b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
5946349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
59470b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59480b57cec5SDimitry Andric           Int64Regs:$r)>;
59490b57cec5SDimitry Andric
59500b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
59510b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59520b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5953349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
59540b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
59550b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
59560b57cec5SDimitry Andric
59570b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
59580b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59590b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
5960349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
59610b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
59620b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
59630b57cec5SDimitry Andric
59640b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
59650b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
59660b57cec5SDimitry Andric           Int32Regs:$g),
5967349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
59680b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
59690b57cec5SDimitry Andric
59700b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
59710b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
59720b57cec5SDimitry Andric           Int64Regs:$g),
5973349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
59740b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
59750b57cec5SDimitry Andric
59760b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
59770b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59780b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5979349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
59800b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59810b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
59820b57cec5SDimitry Andric
59830b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
59840b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59850b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
5986349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
59870b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59880b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
59890b57cec5SDimitry Andric
59900b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
59910b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
59920b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
5993349cc55cSDimitry Andric          (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
59940b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
59950b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
59960b57cec5SDimitry Andric
59970b57cec5SDimitry Andric
59980b57cec5SDimitry Andric
59990b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i8_zero
60000b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60010b57cec5SDimitry Andric           Int16Regs:$r),
6002349cc55cSDimitry Andric          (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
60030b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60040b57cec5SDimitry Andric           Int16Regs:$r)>;
60050b57cec5SDimitry Andric
60060b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i16_zero
60070b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60080b57cec5SDimitry Andric           Int16Regs:$r),
6009349cc55cSDimitry Andric          (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
60100b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60110b57cec5SDimitry Andric           Int16Regs:$r)>;
60120b57cec5SDimitry Andric
60130b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i32_zero
60140b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60150b57cec5SDimitry Andric           Int32Regs:$r),
6016349cc55cSDimitry Andric          (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
60170b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60180b57cec5SDimitry Andric           Int32Regs:$r)>;
60190b57cec5SDimitry Andric
60200b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_i64_zero
60210b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60220b57cec5SDimitry Andric           Int64Regs:$r),
6023349cc55cSDimitry Andric          (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
60240b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60250b57cec5SDimitry Andric           Int64Regs:$r)>;
60260b57cec5SDimitry Andric
60270b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i8_zero
60280b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60290b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
6030349cc55cSDimitry Andric          (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
60310b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60320b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
60330b57cec5SDimitry Andric
60340b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i16_zero
60350b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60360b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
6037349cc55cSDimitry Andric          (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
60380b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60390b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
60400b57cec5SDimitry Andric
60410b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i32_zero
60420b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60430b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g),
6044349cc55cSDimitry Andric          (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
60450b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60460b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
60470b57cec5SDimitry Andric
60480b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v2i64_zero
60490b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60500b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g),
6051349cc55cSDimitry Andric          (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
60520b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60530b57cec5SDimitry Andric           Int64Regs:$r, Int64Regs:$g)>;
60540b57cec5SDimitry Andric
60550b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i8_zero
60560b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60570b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6058349cc55cSDimitry Andric          (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
60590b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60600b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
60610b57cec5SDimitry Andric
60620b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i16_zero
60630b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60640b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6065349cc55cSDimitry Andric          (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
60660b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60670b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
60680b57cec5SDimitry Andric
60690b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_b_3d_v4i32_zero
60700b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60710b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6072349cc55cSDimitry Andric          (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
60730b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
60740b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
60750b57cec5SDimitry Andric
60760b57cec5SDimitry Andric
60770b57cec5SDimitry Andric
60780b57cec5SDimitry Andric
60790b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_i8_trap
60800b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6081349cc55cSDimitry Andric          (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
60820b57cec5SDimitry Andric
60830b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_i16_trap
60840b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
6085349cc55cSDimitry Andric          (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
60860b57cec5SDimitry Andric
60870b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_i32_trap
60880b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
6089349cc55cSDimitry Andric          (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
60900b57cec5SDimitry Andric
60910b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_v2i8_trap
60920b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6093349cc55cSDimitry Andric          (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
60940b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
60950b57cec5SDimitry Andric
60960b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_v2i16_trap
60970b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6098349cc55cSDimitry Andric          (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
60990b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
61000b57cec5SDimitry Andric
61010b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_v2i32_trap
61020b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6103349cc55cSDimitry Andric          (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
61040b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
61050b57cec5SDimitry Andric
61060b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_v4i8_trap
61070b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
61080b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6109349cc55cSDimitry Andric          (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
61100b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
61110b57cec5SDimitry Andric
61120b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_v4i16_trap
61130b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
61140b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6115349cc55cSDimitry Andric          (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
61160b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
61170b57cec5SDimitry Andric
61180b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_v4i32_trap
61190b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x,
61200b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6121349cc55cSDimitry Andric          (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
61220b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
61230b57cec5SDimitry Andric
61240b57cec5SDimitry Andric
61250b57cec5SDimitry Andric
61260b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_i8_trap
61270b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6128349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61290b57cec5SDimitry Andric           Int16Regs:$r)>;
61300b57cec5SDimitry Andric
61310b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_i16_trap
61320b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
6133349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61340b57cec5SDimitry Andric           Int16Regs:$r)>;
61350b57cec5SDimitry Andric
61360b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_i32_trap
61370b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
6138349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61390b57cec5SDimitry Andric           Int32Regs:$r)>;
61400b57cec5SDimitry Andric
61410b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
61420b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6143349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61440b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
61450b57cec5SDimitry Andric
61460b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
61470b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
6148349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61490b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
61500b57cec5SDimitry Andric
61510b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
61520b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
6153349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61540b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
61550b57cec5SDimitry Andric
61560b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
61570b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61580b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6159349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61600b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
61610b57cec5SDimitry Andric
61620b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
61630b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61640b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6165349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61660b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
61670b57cec5SDimitry Andric
61680b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
61690b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61700b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6171349cc55cSDimitry Andric          (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
61720b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
61730b57cec5SDimitry Andric
61740b57cec5SDimitry Andric
61750b57cec5SDimitry Andric
61760b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_i8_trap
61770b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6178349cc55cSDimitry Andric          (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
61790b57cec5SDimitry Andric           Int16Regs:$r)>;
61800b57cec5SDimitry Andric
61810b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_i16_trap
61820b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6183349cc55cSDimitry Andric          (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
61840b57cec5SDimitry Andric           Int16Regs:$r)>;
61850b57cec5SDimitry Andric
61860b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_i32_trap
61870b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6188349cc55cSDimitry Andric          (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
61890b57cec5SDimitry Andric           Int32Regs:$r)>;
61900b57cec5SDimitry Andric
61910b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_v2i8_trap
61920b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6193349cc55cSDimitry Andric          (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
61940b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
61950b57cec5SDimitry Andric
61960b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_v2i16_trap
61970b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
6198349cc55cSDimitry Andric          (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
61990b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
62000b57cec5SDimitry Andric
62010b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_v2i32_trap
62020b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
6203349cc55cSDimitry Andric          (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
62040b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
62050b57cec5SDimitry Andric
62060b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_v4i8_trap
62070b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
62080b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6209349cc55cSDimitry Andric          (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
62100b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
62110b57cec5SDimitry Andric
62120b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_v4i16_trap
62130b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
62140b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6215349cc55cSDimitry Andric          (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
62160b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
62170b57cec5SDimitry Andric
62180b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_v4i32_trap
62190b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
62200b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6221349cc55cSDimitry Andric          (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
62220b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
62230b57cec5SDimitry Andric
62240b57cec5SDimitry Andric
62250b57cec5SDimitry Andric
62260b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_i8_trap
62270b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6228349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
62290b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62300b57cec5SDimitry Andric           Int16Regs:$r)>;
62310b57cec5SDimitry Andric
62320b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_i16_trap
62330b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
6234349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
62350b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62360b57cec5SDimitry Andric           Int16Regs:$r)>;
62370b57cec5SDimitry Andric
62380b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_i32_trap
62390b57cec5SDimitry Andric          Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
6240349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
62410b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62420b57cec5SDimitry Andric           Int32Regs:$r)>;
62430b57cec5SDimitry Andric
62440b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
62450b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62460b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
6247349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
62480b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
62490b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
62500b57cec5SDimitry Andric
62510b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
62520b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62530b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
6254349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
62550b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
62560b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
62570b57cec5SDimitry Andric
62580b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
62590b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
62600b57cec5SDimitry Andric           Int32Regs:$g),
6261349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
62620b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
62630b57cec5SDimitry Andric
62640b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
62650b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62660b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6267349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
62680b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62690b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
62700b57cec5SDimitry Andric
62710b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
62720b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62730b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6274349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
62750b57cec5SDimitry Andric           Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62760b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
62770b57cec5SDimitry Andric
62780b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
62790b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
62800b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6281349cc55cSDimitry Andric          (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
62820b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y,
62830b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
62840b57cec5SDimitry Andric
62850b57cec5SDimitry Andric
62860b57cec5SDimitry Andric
62870b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_i8_trap
62880b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
62890b57cec5SDimitry Andric           Int16Regs:$r),
6290349cc55cSDimitry Andric          (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
62910b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
62920b57cec5SDimitry Andric           Int16Regs:$r)>;
62930b57cec5SDimitry Andric
62940b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_i16_trap
62950b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
62960b57cec5SDimitry Andric           Int16Regs:$r),
6297349cc55cSDimitry Andric          (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
62980b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
62990b57cec5SDimitry Andric           Int16Regs:$r)>;
63000b57cec5SDimitry Andric
63010b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_i32_trap
63020b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63030b57cec5SDimitry Andric           Int32Regs:$r),
6304349cc55cSDimitry Andric          (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
63050b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63060b57cec5SDimitry Andric           Int32Regs:$r)>;
63070b57cec5SDimitry Andric
63080b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_v2i8_trap
63090b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63100b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
6311349cc55cSDimitry Andric          (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
63120b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63130b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
63140b57cec5SDimitry Andric
63150b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_v2i16_trap
63160b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63170b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g),
6318349cc55cSDimitry Andric          (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
63190b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63200b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g)>;
63210b57cec5SDimitry Andric
63220b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_v2i32_trap
63230b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63240b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g),
6325349cc55cSDimitry Andric          (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
63260b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63270b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g)>;
63280b57cec5SDimitry Andric
63290b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_v4i8_trap
63300b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63310b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6332349cc55cSDimitry Andric          (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
63330b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63340b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
63350b57cec5SDimitry Andric
63360b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_v4i16_trap
63370b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63380b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
6339349cc55cSDimitry Andric          (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
63400b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63410b57cec5SDimitry Andric           Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
63420b57cec5SDimitry Andric
63430b57cec5SDimitry Andricdef : Pat<(int_nvvm_sust_p_3d_v4i32_trap
63440b57cec5SDimitry Andric           Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63450b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
6346349cc55cSDimitry Andric          (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
63470b57cec5SDimitry Andric           Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
63480b57cec5SDimitry Andric           Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
63490b57cec5SDimitry Andric
63500b57cec5SDimitry Andric//-----------------------------------
63510b57cec5SDimitry Andric// Read Special Registers
63520b57cec5SDimitry Andric//-----------------------------------
63530b57cec5SDimitry Andric
635406c3fb27SDimitry Andricclass PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
63550b57cec5SDimitry Andric  : NVPTXInst<(outs Int64Regs:$d), (ins),
63560b57cec5SDimitry Andric              !strconcat("mov.u64 \t$d, %", regname, ";"),
635706c3fb27SDimitry Andric              [(set Int64Regs:$d, (intop))]>,
635806c3fb27SDimitry Andric    Requires<Preds>;
63590b57cec5SDimitry Andric
636006c3fb27SDimitry Andricclass PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
63610b57cec5SDimitry Andric  : NVPTXInst<(outs Int32Regs:$d), (ins),
63620b57cec5SDimitry Andric              !strconcat("mov.u32 \t$d, %", regname, ";"),
636306c3fb27SDimitry Andric              [(set Int32Regs:$d, (intop))]>,
636406c3fb27SDimitry Andric    Requires<Preds>;
636506c3fb27SDimitry Andric
636606c3fb27SDimitry Andricmulticlass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
636706c3fb27SDimitry Andric   foreach suffix = ["x", "y", "z", "w"] in {
636806c3fb27SDimitry Andric      defvar reg = regname # "." # suffix;
636906c3fb27SDimitry Andric      defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
637006c3fb27SDimitry Andric      def "_"#suffix :  PTX_READ_SREG_R32<reg, intr, Preds>;
637106c3fb27SDimitry Andric   }
637206c3fb27SDimitry Andric}
63730b57cec5SDimitry Andric
63740b57cec5SDimitry Andric// TODO Add read vector-version of special registers
63750b57cec5SDimitry Andric
637606c3fb27SDimitry Andricdefm INT_PTX_SREG_TID   : PTX_READ_SREG_R32V4<"tid">;
637706c3fb27SDimitry Andricdefm INT_PTX_SREG_NTID  : PTX_READ_SREG_R32V4<"ntid">;
637806c3fb27SDimitry Andricdefm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
637906c3fb27SDimitry Andricdefm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
63800b57cec5SDimitry Andric
638106c3fb27SDimitry Andricdefm INT_PTX_SREG_CLUSTERID :
638206c3fb27SDimitry Andric       PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
638306c3fb27SDimitry Andricdefm INT_PTX_SREG_NCLUSTERID :
638406c3fb27SDimitry Andric       PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
638506c3fb27SDimitry Andricdefm INT_PTX_SREG_CLUSTER_CTAID :
638606c3fb27SDimitry Andric       PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
638706c3fb27SDimitry Andricdefm INT_PTX_SREG_CLUSTER_NCTAID:
638806c3fb27SDimitry Andric       PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
638906c3fb27SDimitry Andric
639006c3fb27SDimitry Andricdef  INT_PTX_SREG_CLUSTER_CTARANK :
639106c3fb27SDimitry Andric       PTX_READ_SREG_R32<"cluster_ctarank",
639206c3fb27SDimitry Andric                         int_nvvm_read_ptx_sreg_cluster_ctarank,
639306c3fb27SDimitry Andric                         [hasSM<90>, hasPTX<78>]>;
639406c3fb27SDimitry Andricdef  INT_PTX_SREG_CLUSTER_NCTARANK:
639506c3fb27SDimitry Andric       PTX_READ_SREG_R32<"cluster_nctarank",
639606c3fb27SDimitry Andric                         int_nvvm_read_ptx_sreg_cluster_nctarank,
639706c3fb27SDimitry Andric                         [hasSM<90>, hasPTX<78>]>;
639806c3fb27SDimitry Andric
63990b57cec5SDimitry Andric
64000b57cec5SDimitry Andricdef INT_PTX_SREG_LANEID :
64010b57cec5SDimitry Andric    PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
64020b57cec5SDimitry Andricdef INT_PTX_SREG_WARPID :
64030b57cec5SDimitry Andric    PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
64040b57cec5SDimitry Andricdef INT_PTX_SREG_NWARPID :
64050b57cec5SDimitry Andric    PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
64060b57cec5SDimitry Andricdef INT_PTX_SREG_SMID :
64070b57cec5SDimitry Andric    PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
64080b57cec5SDimitry Andricdef INT_PTX_SREG_NSMID :
64090b57cec5SDimitry Andric    PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>;
64100b57cec5SDimitry Andricdef INT_PTX_SREG_GRIDID :
64110b57cec5SDimitry Andric    PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>;
64120b57cec5SDimitry Andric
64130b57cec5SDimitry Andricdef INT_PTX_SREG_LANEMASK_EQ :
64140b57cec5SDimitry Andric    PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>;
64150b57cec5SDimitry Andricdef INT_PTX_SREG_LANEMASK_LE :
64160b57cec5SDimitry Andric    PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>;
64170b57cec5SDimitry Andricdef INT_PTX_SREG_LANEMASK_LT :
64180b57cec5SDimitry Andric    PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>;
64190b57cec5SDimitry Andricdef INT_PTX_SREG_LANEMASK_GE :
64200b57cec5SDimitry Andric    PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>;
64210b57cec5SDimitry Andricdef INT_PTX_SREG_LANEMASK_GT :
64220b57cec5SDimitry Andric    PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
64230b57cec5SDimitry Andric
6424*0fca6ea1SDimitry Andriclet hasSideEffects = 1 in {
64250b57cec5SDimitry Andricdef INT_PTX_SREG_CLOCK :
64260b57cec5SDimitry Andric    PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
64270b57cec5SDimitry Andricdef INT_PTX_SREG_CLOCK64 :
64280b57cec5SDimitry Andric    PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
6429*0fca6ea1SDimitry Andricdef INT_PTX_SREG_GLOBALTIMER :
6430*0fca6ea1SDimitry Andric    PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
6431*0fca6ea1SDimitry Andric}
6432*0fca6ea1SDimitry Andric
6433*0fca6ea1SDimitry Andricdef: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>;
6434*0fca6ea1SDimitry Andricdef: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>;
64350b57cec5SDimitry Andric
64360b57cec5SDimitry Andricdef INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
64370b57cec5SDimitry Andricdef INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
64380b57cec5SDimitry Andricdef INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>;
64390b57cec5SDimitry Andricdef INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>;
64400b57cec5SDimitry Andric
64410b57cec5SDimitry Andric// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't
64420b57cec5SDimitry Andric// handle the constant.
64430b57cec5SDimitry Andricdef INT_PTX_SREG_WARPSIZE :
64440b57cec5SDimitry Andric    NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
64450b57cec5SDimitry Andric              [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
64460b57cec5SDimitry Andric
64470b57cec5SDimitry Andric// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
64480b57cec5SDimitry Andric// In addition to target-independent fields provided by WMMA_REGS, it adds
64490b57cec5SDimitry Andric// the fields commonly used to implement specific PTX instruction -- register
64500b57cec5SDimitry Andric// types and names, constraints, parts of assembly, etc.
6451fe6060f1SDimitry Andricclass WMMA_REGINFO<WMMA_REGS r, string op>
64520b57cec5SDimitry Andric      : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
64530b57cec5SDimitry Andric  // NVPTX register types used to carry fragment data.
64540b57cec5SDimitry Andric  NVPTXRegClass regclass = !cond(
645506c3fb27SDimitry Andric    !eq(ptx_elt_type, "f16") : Int32Regs,
64560b57cec5SDimitry Andric    !eq(ptx_elt_type, "f32") : Float32Regs,
6457fe6060f1SDimitry Andric    !eq(ptx_elt_type, "f64") : Float64Regs,
6458fe6060f1SDimitry Andric    !eq(ptx_elt_type, "bf16") : Int32Regs,
6459fe6060f1SDimitry Andric    !eq(ptx_elt_type, "tf32") : Int32Regs,
64600b57cec5SDimitry Andric    !eq(ptx_elt_type, "s32") : Int32Regs,
6461349cc55cSDimitry Andric    !eq(ptx_elt_type, "b16") : Int32Regs,
64620b57cec5SDimitry Andric    !eq(ptx_elt_type, "s8") : Int32Regs,
64630b57cec5SDimitry Andric    !eq(ptx_elt_type, "u8") : Int32Regs,
64640b57cec5SDimitry Andric    !eq(ptx_elt_type, "s4") : Int32Regs,
64650b57cec5SDimitry Andric    !eq(ptx_elt_type, "u4") : Int32Regs,
64660b57cec5SDimitry Andric    !eq(ptx_elt_type, "b1") : Int32Regs);
64670b57cec5SDimitry Andric
64680b57cec5SDimitry Andric  // Instruction input/output arguments for the fragment.
6469e8d8bef9SDimitry Andric  list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs));
64700b57cec5SDimitry Andric
64710b57cec5SDimitry Andric  // List of register names for the fragment -- ["ra0", "ra1",...]
64720b57cec5SDimitry Andric  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
64730b57cec5SDimitry Andric
64740b57cec5SDimitry Andric  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
6475e8d8bef9SDimitry Andric  string regstring = "{{$" # !interleave(reg_names, ", $") # "}}";
64760b57cec5SDimitry Andric
64770b57cec5SDimitry Andric  // Predicates for particular fragment variant. Technically those are
64780b57cec5SDimitry Andric  // per-instruction predicates, but currently all fragments that can be used in
64790b57cec5SDimitry Andric  // a given instruction are subject to the same constraints, so an instruction
64800b57cec5SDimitry Andric  // can use predicates from any of its fragments. If/when this is no
64810b57cec5SDimitry Andric  // longer the case, we can concat all per-fragment predicates to enforce that
64820b57cec5SDimitry Andric  // all fragments of the instruction are viable.
64830b57cec5SDimitry Andric  list<Predicate> Predicates = !cond(
64840b57cec5SDimitry Andric    // fp16 -> fp16/fp32 @ m16n16k16
64850b57cec5SDimitry Andric    !and(!eq(geom, "m16n16k16"),
64860b57cec5SDimitry Andric         !or(!eq(ptx_elt_type, "f16"),
648706c3fb27SDimitry Andric             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
64880b57cec5SDimitry Andric
6489fe6060f1SDimitry Andric    !and(!eq(geom,"m8n8k4"),
649006c3fb27SDimitry Andric         !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
6491fe6060f1SDimitry Andric
64920b57cec5SDimitry Andric    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
64930b57cec5SDimitry Andric    !and(!or(!eq(geom, "m8n32k16"),
64940b57cec5SDimitry Andric             !eq(geom, "m32n8k16")),
64950b57cec5SDimitry Andric         !or(!eq(ptx_elt_type, "f16"),
649606c3fb27SDimitry Andric             !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
64970b57cec5SDimitry Andric
64980b57cec5SDimitry Andric    // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
64990b57cec5SDimitry Andric    !and(!or(!eq(geom,"m16n16k16"),
65000b57cec5SDimitry Andric             !eq(geom,"m8n32k16"),
65010b57cec5SDimitry Andric             !eq(geom,"m32n8k16")),
65020b57cec5SDimitry Andric         !or(!eq(ptx_elt_type, "u8"),
65030b57cec5SDimitry Andric             !eq(ptx_elt_type, "s8"),
650406c3fb27SDimitry Andric             !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
65050b57cec5SDimitry Andric
6506fe6060f1SDimitry Andric    !and(!or(!eq(geom,"m16n16k16"),
6507fe6060f1SDimitry Andric             !eq(geom,"m8n32k16"),
6508fe6060f1SDimitry Andric             !eq(geom,"m32n8k16")),
650906c3fb27SDimitry Andric         !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
6510fe6060f1SDimitry Andric
6511fe6060f1SDimitry Andric    !and(!eq(geom,"m16n16k8"),
651206c3fb27SDimitry Andric         !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
6513fe6060f1SDimitry Andric
6514fe6060f1SDimitry Andric    !and(!eq(geom,"m16n16k8"),
651506c3fb27SDimitry Andric         !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
6516fe6060f1SDimitry Andric
6517fe6060f1SDimitry Andric    // b1 -> s32 @ m8n8k128(b1)
6518fe6060f1SDimitry Andric    !and(!ne(op,"mma"),
651906c3fb27SDimitry Andric         !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
6520fe6060f1SDimitry Andric
6521fe6060f1SDimitry Andric    // u4/s4 -> s32 @ m8n8k32 (u4/s4)
6522fe6060f1SDimitry Andric    !and(!ne(op,"mma"),
652306c3fb27SDimitry Andric         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
6524480093f4SDimitry Andric
6525fe6060f1SDimitry Andric    !or(!eq(geom,"m16n8k8"),
652606c3fb27SDimitry Andric        !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
6527fe6060f1SDimitry Andric
6528fe6060f1SDimitry Andric    !and(!ne(ptx_elt_type,"f64"),
652906c3fb27SDimitry Andric         !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
6530fe6060f1SDimitry Andric
6531fe6060f1SDimitry Andric    // mma m8n8k32 requires higher PTX version
6532fe6060f1SDimitry Andric    !and(!eq(op,"mma"),
653306c3fb27SDimitry Andric         !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
6534fe6060f1SDimitry Andric
6535fe6060f1SDimitry Andric    !and(!eq(ptx_elt_type,"f64"),
653606c3fb27SDimitry Andric         !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
6537fe6060f1SDimitry Andric
6538fe6060f1SDimitry Andric    !and(!eq(op,"mma"),
6539fe6060f1SDimitry Andric         !or(!eq(geom, "m16n8k16"),
6540fe6060f1SDimitry Andric             !eq(geom, "m16n8k4"),
6541fe6060f1SDimitry Andric             !eq(geom, "m16n8k32"),
6542fe6060f1SDimitry Andric             !eq(geom, "m16n8k64"),
6543fe6060f1SDimitry Andric             !eq(geom, "m8n8k128"),
6544fe6060f1SDimitry Andric             !eq(geom, "m16n8k128"),
654506c3fb27SDimitry Andric             !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
6546349cc55cSDimitry Andric
6547349cc55cSDimitry Andric    !and(!eq(op,"ldmatrix"),
6548349cc55cSDimitry Andric         !eq(ptx_elt_type,"b16"),
654906c3fb27SDimitry Andric         !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
65500b57cec5SDimitry Andric
65510b57cec5SDimitry Andric  // template DAGs for instruction inputs/output.
65520b57cec5SDimitry Andric  dag Outs = !dag(outs, ptx_regs, reg_names);
65530b57cec5SDimitry Andric  dag Ins = !dag(ins, ptx_regs, reg_names);
65540b57cec5SDimitry Andric}
65550b57cec5SDimitry Andric
65560b57cec5SDimitry Andric// Convert dag of arguments into a dag to match given intrinsic.
65570b57cec5SDimitry Andricclass BuildPatternI<Intrinsic Intr, dag Ins> {
65580b57cec5SDimitry Andric  // Build a dag pattern that matches the intrinsic call.
65590b57cec5SDimitry Andric  dag ret = !foreach(tmp, Ins,
65600b57cec5SDimitry Andric                          !subst(imem, ADDRvar,
65610b57cec5SDimitry Andric                          !subst(MEMri64, ADDRri64,
65620b57cec5SDimitry Andric                          !subst(MEMri, ADDRri,
65630b57cec5SDimitry Andric                          !subst(ins, Intr, tmp)))));
65640b57cec5SDimitry Andric}
65650b57cec5SDimitry Andric
65660b57cec5SDimitry Andric// Same as above, but uses PatFrag instead of an Intrinsic.
65670b57cec5SDimitry Andricclass BuildPatternPF<PatFrag Intr, dag Ins> {
65680b57cec5SDimitry Andric  // Build a dag pattern that matches the intrinsic call.
65690b57cec5SDimitry Andric  dag ret = !foreach(tmp, Ins,
65700b57cec5SDimitry Andric                          !subst(imem, ADDRvar,
65710b57cec5SDimitry Andric                          !subst(MEMri64, ADDRri64,
65720b57cec5SDimitry Andric                          !subst(MEMri, ADDRri,
65730b57cec5SDimitry Andric                          !subst(ins, Intr, tmp)))));
65740b57cec5SDimitry Andric}
65750b57cec5SDimitry Andric
65760b57cec5SDimitry Andric// Common WMMA-related fields used for building patterns for all MMA instructions.
65770b57cec5SDimitry Andricclass WMMA_INSTR<string _Intr, list<dag> _Args>
65780b57cec5SDimitry Andric  : NVPTXInst<(outs), (ins), "?", []> {
65790b57cec5SDimitry Andric  Intrinsic Intr = !cast<Intrinsic>(_Intr);
65800b57cec5SDimitry Andric  // Concatenate all arguments into a single dag.
65810b57cec5SDimitry Andric  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
65820b57cec5SDimitry Andric  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
65830b57cec5SDimitry Andric  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
65840b57cec5SDimitry Andric}
65850b57cec5SDimitry Andric
65860b57cec5SDimitry Andric//
65870b57cec5SDimitry Andric// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
65880b57cec5SDimitry Andric//
65890b57cec5SDimitry Andric
65900b57cec5SDimitry Andricclass WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
65910b57cec5SDimitry Andric                DAGOperand SrcOp>
65920b57cec5SDimitry Andric  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
65930b57cec5SDimitry Andric                              [!con((ins SrcOp:$src),
65940b57cec5SDimitry Andric                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
65950b57cec5SDimitry Andric    Requires<Frag.Predicates> {
65960b57cec5SDimitry Andric  // Load/store intrinsics are overloaded on pointer's address space.
65970b57cec5SDimitry Andric  // To match the right intrinsic, we need to build AS-constrained PatFrag.
65980b57cec5SDimitry Andric  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
65990b57cec5SDimitry Andric  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
6600e8d8bef9SDimitry Andric  dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src));
66010b57cec5SDimitry Andric  // Build PatFrag that only matches particular address space.
66020b57cec5SDimitry Andric  PatFrag IntrFrag = PatFrag<PFOperands,
6603e8d8bef9SDimitry Andric                             PFOperandsIntr,
66040b57cec5SDimitry Andric                             !cond(!eq(Space, ".shared"): AS_match.shared,
66050b57cec5SDimitry Andric                                   !eq(Space, ".global"): AS_match.global,
6606e8d8bef9SDimitry Andric                                   true: AS_match.generic)>;
66070b57cec5SDimitry Andric  // Build AS-constrained pattern.
66080b57cec5SDimitry Andric  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
66090b57cec5SDimitry Andric
66100b57cec5SDimitry Andric  let OutOperandList = Frag.Outs;
66110b57cec5SDimitry Andric  let InOperandList = !con(Args, (ins MmaCode:$ptx));
66120b57cec5SDimitry Andric  let AsmString = "wmma.load."
66130b57cec5SDimitry Andric                  # Frag.frag
66140b57cec5SDimitry Andric                  # ".sync"
66150b57cec5SDimitry Andric                  # "${ptx:aligned}"
66160b57cec5SDimitry Andric                  # "." # Layout
66170b57cec5SDimitry Andric                  # "." # Frag.geom
66180b57cec5SDimitry Andric                  # Space
66190b57cec5SDimitry Andric                  # "." # Frag.ptx_elt_type # " \t"
66200b57cec5SDimitry Andric                  # Frag.regstring
66210b57cec5SDimitry Andric                  # ", [$src]"
66220b57cec5SDimitry Andric                  # !if(WithStride, ", $ldm", "")
66230b57cec5SDimitry Andric                  # ";";
66240b57cec5SDimitry Andric}
66250b57cec5SDimitry Andric
66260b57cec5SDimitry Andric//
66270b57cec5SDimitry Andric// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
66280b57cec5SDimitry Andric//
66290b57cec5SDimitry Andricclass WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
66300b57cec5SDimitry Andric                   bit WithStride, DAGOperand DstOp>
66310b57cec5SDimitry Andric  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
66320b57cec5SDimitry Andric               [!con((ins DstOp:$dst),
66330b57cec5SDimitry Andric                     Frag.Ins,
66340b57cec5SDimitry Andric                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
66350b57cec5SDimitry Andric    Requires<Frag.Predicates> {
66360b57cec5SDimitry Andric
66370b57cec5SDimitry Andric  // Load/store intrinsics are overloaded on pointer's address space.
66380b57cec5SDimitry Andric  // To match the right intrinsic, we need to build AS-constrained PatFrag.
66390b57cec5SDimitry Andric  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
66400b57cec5SDimitry Andric  dag PFOperands = !con((ops node:$dst),
6641e8d8bef9SDimitry Andric                        !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names),
66420b57cec5SDimitry Andric                        !if(WithStride, (ops node:$ldm), (ops)));
66430b57cec5SDimitry Andric  // Build PatFrag that only matches particular address space.
66440b57cec5SDimitry Andric  PatFrag IntrFrag = PatFrag<PFOperands,
66450b57cec5SDimitry Andric                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
66460b57cec5SDimitry Andric                             !cond(!eq(Space, ".shared"): AS_match.shared,
66470b57cec5SDimitry Andric                                   !eq(Space, ".global"): AS_match.global,
6648e8d8bef9SDimitry Andric                                   true: AS_match.generic)>;
66490b57cec5SDimitry Andric  // Build AS-constrained pattern.
66500b57cec5SDimitry Andric  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
66510b57cec5SDimitry Andric
66520b57cec5SDimitry Andric  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
66530b57cec5SDimitry Andric  let OutOperandList = (outs);
66540b57cec5SDimitry Andric  let AsmString = "wmma.store.d.sync"
66550b57cec5SDimitry Andric                  # "${ptx:aligned}"
66560b57cec5SDimitry Andric                  # "." # Layout
66570b57cec5SDimitry Andric                  # "." # Frag.geom
66580b57cec5SDimitry Andric                  # Space
66590b57cec5SDimitry Andric                  # "." # Frag.ptx_elt_type
66600b57cec5SDimitry Andric                  # " \t[$dst],"
66610b57cec5SDimitry Andric                  # Frag.regstring
66620b57cec5SDimitry Andric                  # !if(WithStride, ", $ldm", "")
66630b57cec5SDimitry Andric                  # ";";
66640b57cec5SDimitry Andric}
66650b57cec5SDimitry Andric
66660b57cec5SDimitry Andric// Create all load/store variants
66670b57cec5SDimitry Andricdefset list<WMMA_INSTR> MMA_LDSTs  = {
66680b57cec5SDimitry Andric  foreach layout = ["row", "col"] in {
6669e8d8bef9SDimitry Andric    foreach stride = [false, true] in {
66700b57cec5SDimitry Andric      foreach space = [".global", ".shared", ""] in {
66710b57cec5SDimitry Andric        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
66720b57cec5SDimitry Andric          foreach frag = NVVM_MMA_OPS.all_ld_ops in
6673fe6060f1SDimitry Andric            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6674fe6060f1SDimitry Andric              def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>;
66750b57cec5SDimitry Andric          foreach frag = NVVM_MMA_OPS.all_st_ops in
6676fe6060f1SDimitry Andric            if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then
6677fe6060f1SDimitry Andric              def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>;
66780b57cec5SDimitry Andric        } // addr
66790b57cec5SDimitry Andric      } // space
66800b57cec5SDimitry Andric    } // stride
66810b57cec5SDimitry Andric  } // layout
66820b57cec5SDimitry Andric} // defset
66830b57cec5SDimitry Andric
6684fe6060f1SDimitry Andric// B1 instruction variants need extra constraints.
6685fe6060f1SDimitry Andricclass MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
6686fe6060f1SDimitry Andric  string Op = b1op;
6687fe6060f1SDimitry Andric  WMMA_REGINFO Frag = FragA;
6688fe6060f1SDimitry Andric  list<Predicate> ret = !listconcat(
6689fe6060f1SDimitry Andric    FragA.Predicates,
669006c3fb27SDimitry Andric    !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
6691fe6060f1SDimitry Andric  );
6692fe6060f1SDimitry Andric}
66930b57cec5SDimitry Andric// WMMA.MMA
66940b57cec5SDimitry Andricclass WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
66950b57cec5SDimitry Andric               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6696fe6060f1SDimitry Andric               string ALayout, string BLayout, int Satfinite, string rnd, string b1op>
6697fe6060f1SDimitry Andric  : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record,
66980b57cec5SDimitry Andric                         [FragA.Ins, FragB.Ins, FragC.Ins]>,
66990b57cec5SDimitry Andric    // Requires does not seem to have effect on Instruction w/o Patterns.
67000b57cec5SDimitry Andric    // We set it here anyways and propagate to the Pat<> we construct below.
6701fe6060f1SDimitry Andric    Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
67020b57cec5SDimitry Andric  let OutOperandList = FragD.Outs;
67030b57cec5SDimitry Andric  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
67040b57cec5SDimitry Andric  string TypeList = !cond(
6705fe6060f1SDimitry Andric    !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type
6706fe6060f1SDimitry Andric                                     # "." # FragC.ptx_elt_type,
6707fe6060f1SDimitry Andric    1: "." # FragD.ptx_elt_type
67080b57cec5SDimitry Andric       # "." # FragA.ptx_elt_type
67090b57cec5SDimitry Andric       # "." # FragB.ptx_elt_type
6710fe6060f1SDimitry Andric       # "." # FragC.ptx_elt_type,
67110b57cec5SDimitry Andric  );
6712fe6060f1SDimitry Andric  let AsmString = "wmma.mma"
6713fe6060f1SDimitry Andric                  # b1op
67140b57cec5SDimitry Andric                  # ".sync"
67150b57cec5SDimitry Andric                  # "${ptx:aligned}"
67160b57cec5SDimitry Andric                  # "." # ALayout
67170b57cec5SDimitry Andric                  # "." # BLayout
67180b57cec5SDimitry Andric                  # "." # FragA.geom
6719fe6060f1SDimitry Andric                  # !if(!ne(rnd, ""), !strconcat(".", rnd), "")
67200b57cec5SDimitry Andric                  # TypeList
67210b57cec5SDimitry Andric                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
67220b57cec5SDimitry Andric                  # FragD.regstring # ",\n\t\t"
67230b57cec5SDimitry Andric                  # FragA.regstring # ",\n\t\t"
67240b57cec5SDimitry Andric                  # FragB.regstring # ",\n\t\t"
6725fe6060f1SDimitry Andric                  # FragC.regstring # ";";
6726fe6060f1SDimitry Andric}
6727fe6060f1SDimitry Andric
6728*0fca6ea1SDimitry Andriclet isConvergent = true in {
6729fe6060f1SDimitry Andricdefset list<WMMA_INSTR> WMMAs  = {
6730fe6060f1SDimitry Andric  foreach layout_a = ["row", "col"] in {
6731fe6060f1SDimitry Andric    foreach layout_b = ["row", "col"] in {
6732fe6060f1SDimitry Andric      foreach satf = [0, 1] in {
6733fe6060f1SDimitry Andric        foreach rnd = ["", "rn", "rz", "rm", "rp"] in {
6734fe6060f1SDimitry Andric          foreach op = NVVM_MMA_OPS.all_wmma_ops in {
6735fe6060f1SDimitry Andric            foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6736fe6060f1SDimitry Andric              if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then {
6737fe6060f1SDimitry Andric                def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">,
6738fe6060f1SDimitry Andric                              WMMA_REGINFO<op[1], "wmma.mma">,
6739fe6060f1SDimitry Andric                              WMMA_REGINFO<op[2], "wmma.mma">,
6740fe6060f1SDimitry Andric                              WMMA_REGINFO<op[3], "wmma.mma">,
6741fe6060f1SDimitry Andric                              layout_a, layout_b, satf, rnd, b1op>;
6742fe6060f1SDimitry Andric              }
6743fe6060f1SDimitry Andric            } // b1op
6744fe6060f1SDimitry Andric          } // op
6745fe6060f1SDimitry Andric        } // rnd
6746fe6060f1SDimitry Andric      } // satf
6747fe6060f1SDimitry Andric    } // layout_b
6748fe6060f1SDimitry Andric  } // layout_a
6749fe6060f1SDimitry Andric} // defset
6750*0fca6ea1SDimitry Andric}
6751fe6060f1SDimitry Andric
6752fe6060f1SDimitry Andric// MMA
6753fe6060f1SDimitry Andricclass MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
6754fe6060f1SDimitry Andric               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
6755fe6060f1SDimitry Andric               string ALayout, string BLayout, int Satfinite, string b1op>
6756fe6060f1SDimitry Andric  : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record,
6757fe6060f1SDimitry Andric                        [FragA.Ins, FragB.Ins, FragC.Ins]>,
6758fe6060f1SDimitry Andric    // Requires does not seem to have effect on Instruction w/o Patterns.
6759fe6060f1SDimitry Andric    // We set it here anyways and propagate to the Pat<> we construct below.
6760fe6060f1SDimitry Andric  Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> {
6761fe6060f1SDimitry Andric  let OutOperandList = FragD.Outs;
6762fe6060f1SDimitry Andric  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
6763fe6060f1SDimitry Andric  string TypeList = "." # FragD.ptx_elt_type
6764fe6060f1SDimitry Andric                    # "." # FragA.ptx_elt_type
6765fe6060f1SDimitry Andric                    # "." # FragB.ptx_elt_type
6766fe6060f1SDimitry Andric                    # "." # FragC.ptx_elt_type;
6767fe6060f1SDimitry Andric  let AsmString = "mma.sync.aligned."
6768fe6060f1SDimitry Andric                  # FragA.geom
6769fe6060f1SDimitry Andric                  # "." # ALayout
6770fe6060f1SDimitry Andric                  # "." # BLayout
6771fe6060f1SDimitry Andric                  # !if(Satfinite, ".satfinite", "")
6772fe6060f1SDimitry Andric                  # TypeList
6773fe6060f1SDimitry Andric                  # b1op # "\n\t\t"
6774fe6060f1SDimitry Andric                  # FragD.regstring # ",\n\t\t"
6775fe6060f1SDimitry Andric                  # FragA.regstring # ",\n\t\t"
6776fe6060f1SDimitry Andric                  # FragB.regstring # ",\n\t\t"
6777fe6060f1SDimitry Andric                  # FragC.regstring # ";";
67780b57cec5SDimitry Andric}
67790b57cec5SDimitry Andric
6780*0fca6ea1SDimitry Andriclet isConvergent = true in {
67810b57cec5SDimitry Andricdefset list<WMMA_INSTR> MMAs  = {
67820b57cec5SDimitry Andric  foreach layout_a = ["row", "col"] in {
67830b57cec5SDimitry Andric    foreach layout_b = ["row", "col"] in {
67840b57cec5SDimitry Andric      foreach satf = [0, 1] in {
67850b57cec5SDimitry Andric        foreach op = NVVM_MMA_OPS.all_mma_ops in {
6786fe6060f1SDimitry Andric          foreach b1op = NVVM_MMA_B1OPS<op>.ret in {
6787e8d8bef9SDimitry Andric            if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then {
6788fe6060f1SDimitry Andric              def : MMA<WMMA_REGINFO<op[0], "mma">,
6789fe6060f1SDimitry Andric                        WMMA_REGINFO<op[1], "mma">,
6790fe6060f1SDimitry Andric                        WMMA_REGINFO<op[2], "mma">,
6791fe6060f1SDimitry Andric                        WMMA_REGINFO<op[3], "mma">,
6792fe6060f1SDimitry Andric                        layout_a, layout_b, satf, b1op>;
67930b57cec5SDimitry Andric            }
6794fe6060f1SDimitry Andric          } // b1op
67950b57cec5SDimitry Andric        } // op
67960b57cec5SDimitry Andric      } // satf
67970b57cec5SDimitry Andric    } // layout_b
67980b57cec5SDimitry Andric  } // layout_a
67990b57cec5SDimitry Andric} // defset
6800*0fca6ea1SDimitry Andric}
68010b57cec5SDimitry Andric
6802349cc55cSDimitry Andric//
6803349cc55cSDimitry Andric// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
6804349cc55cSDimitry Andric//
6805349cc55cSDimitry Andricclass LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
6806349cc55cSDimitry Andric               DAGOperand SrcOp>
6807349cc55cSDimitry Andric  : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
6808349cc55cSDimitry Andric    Requires<Frag.Predicates> {
6809349cc55cSDimitry Andric  // Build PatFrag that only matches particular address space.
6810349cc55cSDimitry Andric  PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
6811349cc55cSDimitry Andric                             !cond(!eq(Space, ".shared"): AS_match.shared,
6812349cc55cSDimitry Andric                                   true: AS_match.generic)>;
6813349cc55cSDimitry Andric  // Build AS-constrained pattern.
6814349cc55cSDimitry Andric  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
6815349cc55cSDimitry Andric
6816349cc55cSDimitry Andric  let OutOperandList = Frag.Outs;
6817349cc55cSDimitry Andric  let InOperandList = !con(Args, (ins MmaCode:$ptx));
6818349cc55cSDimitry Andric  let AsmString = "ldmatrix.sync.aligned."
6819349cc55cSDimitry Andric                  # Frag.geom
6820349cc55cSDimitry Andric                  # "." # Frag.frag
6821349cc55cSDimitry Andric                  # !if(Transposed, ".trans", "")
6822349cc55cSDimitry Andric                  # Space
6823349cc55cSDimitry Andric                  # "." # Frag.ptx_elt_type
6824349cc55cSDimitry Andric                  # " " # Frag.regstring # ", [$src];";
6825349cc55cSDimitry Andric}
6826349cc55cSDimitry Andric
6827349cc55cSDimitry Andric// Create all ldmatrix variants
6828349cc55cSDimitry Andricdefset list<WMMA_INSTR> LDMATRIXs  = {
6829349cc55cSDimitry Andric  foreach transposed = [false, true] in {
6830349cc55cSDimitry Andric    foreach space = [".shared", ""] in {
6831349cc55cSDimitry Andric      foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
6832349cc55cSDimitry Andric        foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
6833349cc55cSDimitry Andric          if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
6834349cc55cSDimitry Andric            def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
6835349cc55cSDimitry Andric                            addr>;
6836349cc55cSDimitry Andric      } // addr
6837349cc55cSDimitry Andric    } // space
6838349cc55cSDimitry Andric  } // transposed
6839349cc55cSDimitry Andric} // defset
68400b57cec5SDimitry Andric
68410b57cec5SDimitry Andric// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
68420b57cec5SDimitry Andric// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
68430b57cec5SDimitry Andric// the instruction record.
6844fe6060f1SDimitry Andricclass MMA_PAT<WMMA_INSTR wi>
68450b57cec5SDimitry Andric      : Pat<wi.IntrinsicPattern,
68460b57cec5SDimitry Andric            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
68470b57cec5SDimitry Andric                 (wi ptx.version))>,
68480b57cec5SDimitry Andric        Requires<wi.Predicates>;
68490b57cec5SDimitry Andric
68500b57cec5SDimitry Andric// Build intrinsic->instruction patterns for all MMA instructions.
6851349cc55cSDimitry Andricforeach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
6852fe6060f1SDimitry Andric  def : MMA_PAT<mma>;
685306c3fb27SDimitry Andric
685406c3fb27SDimitry Andricmulticlass MAPA<string suffix, Intrinsic Intr> {
685506c3fb27SDimitry Andric  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
685606c3fb27SDimitry Andric              "mapa" # suffix # ".u32\t$d, $a, $b;",
685706c3fb27SDimitry Andric              [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
685806c3fb27SDimitry Andric    Requires<[hasSM<90>, hasPTX<78>]>;
685906c3fb27SDimitry Andric  def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
686006c3fb27SDimitry Andric              "mapa" # suffix # ".u32\t$d, $a, $b;",
686106c3fb27SDimitry Andric              [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
686206c3fb27SDimitry Andric    Requires<[hasSM<90>, hasPTX<78>]>;
686306c3fb27SDimitry Andric  def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
686406c3fb27SDimitry Andric              "mapa" # suffix # ".u64\t$d, $a, $b;",
686506c3fb27SDimitry Andric              [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
686606c3fb27SDimitry Andric    Requires<[hasSM<90>, hasPTX<78>]>;
686706c3fb27SDimitry Andric  def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
686806c3fb27SDimitry Andric              "mapa" # suffix # ".u64\t$d, $a, $b;",
686906c3fb27SDimitry Andric              [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
687006c3fb27SDimitry Andric    Requires<[hasSM<90>, hasPTX<78>]>;
687106c3fb27SDimitry Andric}
687206c3fb27SDimitry Andric
687306c3fb27SDimitry Andricdefm mapa  : MAPA<"", int_nvvm_mapa>;
687406c3fb27SDimitry Andricdefm mapa_shared_cluster  : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
687506c3fb27SDimitry Andric
687606c3fb27SDimitry Andric
687706c3fb27SDimitry Andricmulticlass GETCTARANK<string suffix, Intrinsic Intr> {
687806c3fb27SDimitry Andric  def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
687906c3fb27SDimitry Andric              "getctarank" # suffix # ".u32\t$d, $a;",
688006c3fb27SDimitry Andric              [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
688106c3fb27SDimitry Andric    Requires<[hasSM<90>, hasPTX<78>]>;
688206c3fb27SDimitry Andric  def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
688306c3fb27SDimitry Andric              "getctarank" # suffix # ".u64\t$d, $a;",
688406c3fb27SDimitry Andric              [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
688506c3fb27SDimitry Andric    Requires<[hasSM<90>, hasPTX<78>]>;
688606c3fb27SDimitry Andric}
688706c3fb27SDimitry Andric
688806c3fb27SDimitry Andricdefm getctarank  : GETCTARANK<"", int_nvvm_getctarank>;
688906c3fb27SDimitry Andricdefm getctarank_shared_cluster  : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
689006c3fb27SDimitry Andric
689106c3fb27SDimitry Andricdef is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
689206c3fb27SDimitry Andric              "mov.pred\t$d, %is_explicit_cluster;",
689306c3fb27SDimitry Andric              [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
689406c3fb27SDimitry Andric    Requires<[hasSM<90>, hasPTX<78>]>;
6895297eecfbSDimitry Andric
6896297eecfbSDimitry Andric// setmaxnreg inc/dec intrinsics
6897297eecfbSDimitry Andriclet isConvergent = true in {
6898297eecfbSDimitry Andricmulticlass SET_MAXNREG<string Action, Intrinsic Intr> {
6899297eecfbSDimitry Andric  def : NVPTXInst<(outs), (ins i32imm:$reg_count),
6900297eecfbSDimitry Andric          "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
6901297eecfbSDimitry Andric          [(Intr timm:$reg_count)]>,
6902297eecfbSDimitry Andric    Requires<[hasSM90a, hasPTX<80>]>;
6903297eecfbSDimitry Andric}
6904297eecfbSDimitry Andric
6905297eecfbSDimitry Andricdefm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
6906297eecfbSDimitry Andricdefm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
6907*0fca6ea1SDimitry Andric
6908297eecfbSDimitry Andric} // isConvergent
6909*0fca6ea1SDimitry Andric
6910*0fca6ea1SDimitry Andricdef INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>;
6911