1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX<60>, hasSM<30>]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX<60>, hasSM<30>]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX<60>, hasSM<30>]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX<60>, hasSM<30>]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX<60>, hasSM<30>]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX<60>, hasSM<30>]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX<60>, hasSM<30>]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX<60>, hasSM<30>]>; 135 136class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr, 137 list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>: 138 NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>, 139 Requires<Preds>; 140 141def barrier_cluster_arrive: 142 INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>; 143def barrier_cluster_arrive_relaxed: 144 INT_BARRIER_CLUSTER<"arrive.relaxed", 145 int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>; 146def barrier_cluster_wait: 147 INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>; 148 149// 'aligned' versions of the cluster barrier intrinsics 150def barrier_cluster_arrive_aligned: 151 INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>; 152def barrier_cluster_arrive_relaxed_aligned: 153 INT_BARRIER_CLUSTER<"arrive.relaxed.aligned", 154 int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>; 155def barrier_cluster_wait_aligned: 156 INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>; 157 158class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 159 bit offset_imm, bit mask_imm, bit threadmask_imm> 160 : NVPTXInst<(outs), (ins), "?", []> { 161 NVPTXRegClass rc = !cond( 162 !eq(reg, "i32"): Int32Regs, 163 !eq(reg, "f32"): Float32Regs); 164 string IntrName = "int_nvvm_shfl_" 165 # !if(sync, "sync_", "") 166 # mode 167 # "_" # reg 168 # !if(return_pred, "p", ""); 169 Intrinsic Intr = !cast<Intrinsic>(IntrName); 170 let InOperandList = !con( 171 !if(sync, 172 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 173 (ins)), 174 (ins rc:$src), 175 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 176 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 177 ); 178 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 179 let AsmString = "shfl." 180 # !if(sync, "sync.", "") 181 # mode # ".b32\t" 182 # "$dst" 183 # !if(return_pred, "|$pred", "") # ", " 184 # "$src, $offset, $mask" 185 # !if(sync, ", $threadmask", "") 186 # ";" 187 ; 188 let Pattern = [!con( 189 !foreach(tmp, OutOperandList, 190 !subst(outs, set, 191 !subst(i32imm, imm, tmp))), 192 (set !foreach(tmp, InOperandList, 193 !subst(ins, Intr, 194 !subst(i32imm, imm, tmp)))) 195 )]; 196} 197 198foreach sync = [false, true] in { 199 foreach mode = ["up", "down", "bfly", "idx"] in { 200 foreach regclass = ["i32", "f32"] in { 201 foreach return_pred = [false, true] in { 202 foreach offset_imm = [false, true] in { 203 foreach mask_imm = [false, true] in { 204 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 205 def : SHFL_INSTR<sync, mode, regclass, return_pred, 206 offset_imm, mask_imm, threadmask_imm>, 207 Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>; 208 } 209 } 210 } 211 } 212 } 213 } 214} 215 216// vote.{all,any,uni,ballot} 217multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 218 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 219 "vote." # mode # " \t$dest, $pred;", 220 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 221 Requires<[hasPTX<60>, hasSM<30>]>; 222} 223 224defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 225defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 226defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 227defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 228 229// vote.sync.{all,any,uni,ballot} 230multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 231 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 232 "vote.sync." # mode # " \t$dest, $pred, $mask;", 233 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 234 Requires<[hasPTX<60>, hasSM<30>]>; 235 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 236 "vote.sync." # mode #" \t$dest, $pred, $mask;", 237 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 238 Requires<[hasPTX<60>, hasSM<30>]>; 239} 240 241defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 242defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 243defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 244defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 245 246multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 247 Operand ImmOp> { 248 def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), 249 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 250 [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, 251 Requires<[hasPTX<60>, hasSM<70>]>; 252 def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), 253 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 254 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 255 Requires<[hasPTX<60>, hasSM<70>]>; 256 def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), 257 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 258 [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, 259 Requires<[hasPTX<60>, hasSM<70>]>; 260 def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), 261 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 262 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 263 Requires<[hasPTX<60>, hasSM<70>]>; 264} 265 266defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 267 i32imm>; 268defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 269 i64imm>; 270 271multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 272 Operand ImmOp> { 273 def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 274 (ins i32imm:$mask, ImmOp:$value), 275 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 276 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 277 Requires<[hasPTX<60>, hasSM<70>]>; 278 def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 279 (ins Int32Regs:$mask, ImmOp:$value), 280 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 281 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 282 Requires<[hasPTX<60>, hasSM<70>]>; 283 def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 284 (ins i32imm:$mask, regclass:$value), 285 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 286 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 287 Requires<[hasPTX<60>, hasSM<70>]>; 288 def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 289 (ins Int32Regs:$mask, regclass:$value), 290 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 291 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 292 Requires<[hasPTX<60>, hasSM<70>]>; 293} 294defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 295 i32imm>; 296defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 297 i64imm>; 298 299multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 300 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 301 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 302 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, 303 Requires<[hasPTX<70>, hasSM<80>]>; 304} 305 306defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 307defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 308defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 309defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 310defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 311defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 312defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 313defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 314 315} // isConvergent = true 316 317//----------------------------------- 318// Explicit Memory Fence Functions 319//----------------------------------- 320class MEMBAR<string StrOp, Intrinsic IntOP> : 321 NVPTXInst<(outs), (ins), 322 StrOp, [(IntOP)]>; 323 324def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 325def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 326def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 327 328def INT_FENCE_SC_CLUSTER: 329 MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>, 330 Requires<[hasPTX<78>, hasSM<90>]>; 331 332//----------------------------------- 333// Async Copy Functions 334//----------------------------------- 335 336multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 337 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 338 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 339 [(Intrin Int32Regs:$addr)]>, 340 Requires<[hasPTX<70>, hasSM<80>]>; 341 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 342 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 343 [(Intrin Int64Regs:$addr)]>, 344 Requires<[hasPTX<70>, hasSM<80>]>; 345} 346 347defm CP_ASYNC_MBARRIER_ARRIVE : 348 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 349defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 350 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 351defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 352 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 353defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 354 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 355 356multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> { 357 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 358 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), 359 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 360 Requires<[hasPTX<70>, hasSM<80>]>; 361 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 362 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), 363 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 364 Requires<[hasPTX<70>, hasSM<80>]>; 365 // Variant with src_size parameter 366 def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size), 367 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 368 [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>, 369 Requires<[hasPTX<70>, hasSM<80>]>; 370 def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size), 371 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 372 [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>, 373 Requires<[hasPTX<70>, hasSM<80>]>; 374 def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size), 375 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 376 [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>, 377 Requires<[hasPTX<70>, hasSM<80>]>; 378 def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size), 379 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 380 [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>, 381 Requires<[hasPTX<70>, hasSM<80>]>; 382} 383 384defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 385 CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4, 386 int_nvvm_cp_async_ca_shared_global_4_s>; 387 388defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 389 CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8, 390 int_nvvm_cp_async_ca_shared_global_8_s>; 391 392defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 393 CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16, 394 int_nvvm_cp_async_ca_shared_global_16_s>; 395 396defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 397 CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16, 398 int_nvvm_cp_async_cg_shared_global_16_s>; 399 400def CP_ASYNC_COMMIT_GROUP : 401 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 402 Requires<[hasPTX<70>, hasSM<80>]>; 403 404def CP_ASYNC_WAIT_GROUP : 405 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 406 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, 407 Requires<[hasPTX<70>, hasSM<80>]>; 408 409def CP_ASYNC_WAIT_ALL : 410 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 411 [(int_nvvm_cp_async_wait_all)]>, 412 Requires<[hasPTX<70>, hasSM<80>]>; 413 414// cp.async.bulk variants of the commit/wait group 415def CP_ASYNC_BULK_COMMIT_GROUP : 416 NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;", 417 [(int_nvvm_cp_async_bulk_commit_group)]>, 418 Requires<[hasPTX<80>, hasSM<90>]>; 419 420def CP_ASYNC_BULK_WAIT_GROUP : 421 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;", 422 [(int_nvvm_cp_async_bulk_wait_group (i32 timm:$n))]>, 423 Requires<[hasPTX<80>, hasSM<90>]>; 424 425def CP_ASYNC_BULK_WAIT_GROUP_READ : 426 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;", 427 [(int_nvvm_cp_async_bulk_wait_group_read (i32 timm:$n))]>, 428 Requires<[hasPTX<80>, hasSM<90>]>; 429 430//----------------------------------- 431// MBarrier Functions 432//----------------------------------- 433 434multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 435 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 436 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 437 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, 438 Requires<[hasPTX<70>, hasSM<80>]>; 439 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 440 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 441 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, 442 Requires<[hasPTX<70>, hasSM<80>]>; 443} 444 445defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 446defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 447 int_nvvm_mbarrier_init_shared>; 448 449multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 450 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 451 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 452 [(Intrin Int32Regs:$addr)]>, 453 Requires<[hasPTX<70>, hasSM<80>]>; 454 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 455 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 456 [(Intrin Int64Regs:$addr)]>, 457 Requires<[hasPTX<70>, hasSM<80>]>; 458} 459 460defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 461defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 462 int_nvvm_mbarrier_inval_shared>; 463 464multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 465 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 466 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 467 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 468 Requires<[hasPTX<70>, hasSM<80>]>; 469 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 470 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 471 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 472 Requires<[hasPTX<70>, hasSM<80>]>; 473} 474 475defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 476defm MBARRIER_ARRIVE_SHARED : 477 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 478 479multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 480 def _32 : NVPTXInst<(outs Int64Regs:$state), 481 (ins Int32Regs:$addr, Int32Regs:$count), 482 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 483 ".b64 $state, [$addr], $count;"), 484 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 485 Requires<[hasPTX<70>, hasSM<80>]>; 486 def _64 : NVPTXInst<(outs Int64Regs:$state), 487 (ins Int64Regs:$addr, Int32Regs:$count), 488 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 489 ".b64 $state, [$addr], $count;"), 490 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 491 Requires<[hasPTX<70>, hasSM<80>]>; 492} 493 494defm MBARRIER_ARRIVE_NOCOMPLETE : 495 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 496defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 497 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 498 499multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 500 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 501 !strconcat("mbarrier.arrive_drop", AddrSpace, 502 ".b64 $state, [$addr];"), 503 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 504 Requires<[hasPTX<70>, hasSM<80>]>; 505 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 506 !strconcat("mbarrier.arrive_drop", AddrSpace, 507 ".b64 $state, [$addr];"), 508 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 509 Requires<[hasPTX<70>, hasSM<80>]>; 510} 511 512defm MBARRIER_ARRIVE_DROP : 513 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 514defm MBARRIER_ARRIVE_DROP_SHARED : 515 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 516 517multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 518 def _32 : NVPTXInst<(outs Int64Regs:$state), 519 (ins Int32Regs:$addr, Int32Regs:$count), 520 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 521 ".b64 $state, [$addr], $count;"), 522 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 523 Requires<[hasPTX<70>, hasSM<80>]>; 524 def _64 : NVPTXInst<(outs Int64Regs:$state), 525 (ins Int64Regs:$addr, Int32Regs:$count), 526 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 527 ".b64 $state, [$addr], $count;"), 528 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 529 Requires<[hasPTX<70>, hasSM<80>]>; 530} 531 532defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 533 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 534defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 535 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 536 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 537 538multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 539 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 540 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 541 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, 542 Requires<[hasPTX<70>, hasSM<80>]>; 543 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 544 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 545 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, 546 Requires<[hasPTX<70>, hasSM<80>]>; 547} 548 549defm MBARRIER_TEST_WAIT : 550 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 551defm MBARRIER_TEST_WAIT_SHARED : 552 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 553 554class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 555 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 556 "mbarrier.pending_count.b64 $res, $state;", 557 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, 558 Requires<[hasPTX<70>, hasSM<80>]>; 559 560def MBARRIER_PENDING_COUNT : 561 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 562 563//----------------------------------- 564// Math Functions 565//----------------------------------- 566 567// Map min(1.0, max(0.0, x)) to sat(x) 568// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 569// NaN 570// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 571// Same story for fmax, fmin. 572 573def : Pat<(int_nvvm_fmin_f immFloat1, 574 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 575 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 576def : Pat<(int_nvvm_fmin_f immFloat1, 577 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 578 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 579def : Pat<(int_nvvm_fmin_f 580 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 581 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 582def : Pat<(int_nvvm_fmin_f 583 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 584 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 585 586def : Pat<(int_nvvm_fmin_d immDouble1, 587 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 588 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 589def : Pat<(int_nvvm_fmin_d immDouble1, 590 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 591 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 592def : Pat<(int_nvvm_fmin_d 593 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 594 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 595def : Pat<(int_nvvm_fmin_d 596 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 597 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 598 599 600// We need a full string for OpcStr here because we need to deal with case like 601// INT_PTX_RECIP. 602class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 603 NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 604 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 605 OpcStr, 606 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>, 607 Requires<Preds>; 608 609// We need a full string for OpcStr here because we need to deal with the case 610// like INT_PTX_NATIVE_POWR_F. 611class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 612 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP, 613 list<Predicate> Preds = []> 614 : NVPTXInst<(outs t_regclass:$dst), 615 (ins s0_regclass:$src0, s1_regclass:$src1), 616 OpcStr, 617 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>, 618 Requires<Preds>; 619 620class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 621 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 622 NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 623 : NVPTXInst<(outs t_regclass:$dst), 624 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 625 OpcStr, 626 [(set t_regclass:$dst, 627 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>, 628 Requires<Preds>; 629 630// 631// MISC 632// 633 634def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 635 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 636 637// 638// Min Max 639// 640 641def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 642 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 643def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 644 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 645def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;", 646 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f, 647 [hasPTX<70>, hasSM<80>]>; 648def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;", 649 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f, 650 [hasPTX<70>, hasSM<80>]>; 651def INT_NVVM_FMIN_XORSIGN_ABS_F : 652 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;", 653 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f, 654 [hasPTX<72>, hasSM<86>]>; 655def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F : 656 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 657 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f, 658 [hasPTX<72>, hasSM<86>]>; 659def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F : 660 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 661 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f, 662 [hasPTX<72>, hasSM<86>]>; 663def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F : 664 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 665 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f, 666 [hasPTX<72>, hasSM<86>]>; 667 668def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 669 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 670def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 671 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 672def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;", 673 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f, 674 [hasPTX<70>, hasSM<80>]>; 675def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;", 676 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f, 677 [hasPTX<70>, hasSM<80>]>; 678def INT_NVVM_FMAX_XORSIGN_ABS_F : 679 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;", 680 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f, 681 [hasPTX<72>, hasSM<86>]>; 682def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F : 683 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 684 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f, 685 [hasPTX<72>, hasSM<86>]>; 686def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F : 687 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 688 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f, 689 [hasPTX<72>, hasSM<86>]>; 690def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F : 691 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 692 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f, 693 [hasPTX<72>, hasSM<86>]>; 694 695def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 696 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 697def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 698 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 699 700// 701// Min Max f16, f16x2, bf16, bf16x2 702// 703 704class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 705 list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> { 706 string Variant = V; 707 Intrinsic Intr = I; 708 NVPTXRegClass RegClass = RC; 709 list<Predicate> Predicates = Preds; 710} 711 712multiclass MIN_MAX<string IntName> { 713 foreach P = [ 714 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, 715 int_nvvm_fmax_f16), Int16Regs>, 716 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, 717 int_nvvm_fmax_ftz_f16), Int16Regs>, 718 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, 719 int_nvvm_fmax_nan_f16), Int16Regs>, 720 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), 721 int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>, 722 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), 723 int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), 724 Int16Regs, [hasPTX<72>, hasSM<86>]>, 725 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), 726 int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), 727 Int16Regs, [hasPTX<72>, hasSM<86>]>, 728 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 729 int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), 730 Int16Regs, [hasPTX<72>, hasSM<86>]>, 731 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 732 int_nvvm_fmin_ftz_nan_xorsign_abs_f16, 733 int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>, 734 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, 735 int_nvvm_fmax_f16x2), Int32Regs>, 736 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), 737 int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>, 738 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), 739 int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>, 740 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), 741 int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>, 742 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 743 int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), 744 Int32Regs, [hasPTX<72>, hasSM<86>]>, 745 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 746 int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), 747 Int32Regs, [hasPTX<72>, hasSM<86>]>, 748 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 749 int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), 750 Int32Regs, [hasPTX<72>, hasSM<86>]>, 751 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 752 int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, 753 int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), 754 Int32Regs, [hasPTX<72>, hasSM<86>]>, 755 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), 756 int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, 757 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, 758 int_nvvm_fmax_nan_bf16), Int16Regs>, 759 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"), 760 int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16), 761 Int16Regs, [hasPTX<72>, hasSM<86>]>, 762 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"), 763 int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16), 764 Int16Regs, [hasPTX<72>, hasSM<86>]>, 765 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2, 766 int_nvvm_fmax_bf16x2), Int32Regs>, 767 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"), 768 int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>, 769 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 770 int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2), 771 Int32Regs, [hasPTX<72>, hasSM<86>]>, 772 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 773 int_nvvm_fmin_nan_xorsign_abs_bf16x2, 774 int_nvvm_fmax_nan_xorsign_abs_bf16x2), 775 Int32Regs, [hasPTX<72>, hasSM<86>]>] in { 776 def P.Variant : F_MATH_2<!strconcat( 777 IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"), 778 P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 779 } 780} 781 782defm INT_NVVM_FMIN : MIN_MAX<"min">; 783defm INT_NVVM_FMAN : MIN_MAX<"max">; 784 785// 786// Multiplication 787// 788 789def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs, 790 Int16Regs, Int16Regs, int_nvvm_mulhi_s>; 791def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs, 792 Int16Regs, Int16Regs, int_nvvm_mulhi_us>; 793def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 794 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 795def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 796 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 797def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 798 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 799def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 800 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 801 802def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 803 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 804def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 805 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 806def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 807 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 808def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 809 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 810def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 811 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 812def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 813 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 814def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 815 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 816def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 817 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 818 819def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 820 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 821def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 822 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 823def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 824 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 825def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 826 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 827 828def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 829 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 830def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 831 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 832 833// 834// Div 835// 836 837def INT_NVVM_DIV_APPROX_FTZ_F 838 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 839 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 840def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 841 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 842 843def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 844 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 845def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 846 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 847def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 848 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 849def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 850 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 851def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 852 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 853def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 854 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 855def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 856 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 857def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 858 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 859 860def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 861 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 862def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 863 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 864def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 865 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 866def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 867 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 868 869// 870// Sad 871// 872 873def INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;", 874 Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>; 875def INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;", 876 Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>; 877def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 878 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 879def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 880 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 881def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;", 882 Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>; 883def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;", 884 Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>; 885 886// 887// Floor Ceil 888// 889 890def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 891 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 892def : Pat<(int_nvvm_floor_f Float32Regs:$a), 893 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 894def : Pat<(int_nvvm_floor_d Float64Regs:$a), 895 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 896 897def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 898 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 899def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 900 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 901def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 902 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 903 904// 905// Abs 906// 907 908def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 909 Float32Regs, int_nvvm_fabs_ftz_f>; 910def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 911 Float32Regs, int_nvvm_fabs_f>; 912 913def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 914 Float64Regs, int_nvvm_fabs_d>; 915 916// 917// Abs, Neg bf16, bf16x2 918// 919 920def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs, 921 Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>; 922def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs, 923 Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>; 924def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs, 925 Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>; 926def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs, 927 Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>; 928 929// 930// Round 931// 932 933def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 934 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 935def : Pat<(int_nvvm_round_f Float32Regs:$a), 936 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 937def : Pat<(int_nvvm_round_d Float64Regs:$a), 938 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 939 940// 941// Trunc 942// 943 944def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 945 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 946def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 947 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 948def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 949 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 950 951// 952// Saturate 953// 954 955def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 956 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 957def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 958 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 959def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 960 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 961 962// 963// Exp2 Log2 964// 965 966def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 967 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 968def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 969 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 970def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 971 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 972def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", 973 Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; 974def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", 975 Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; 976 977def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 978 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 979def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 980 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 981def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 982 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 983 984// 985// Sin Cos 986// 987 988def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 989 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 990def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 991 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 992 993def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 994 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 995def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 996 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 997 998// 999// Fma 1000// 1001 1002class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 1003 list<Predicate> Preds = []> { 1004 string Variant = V; 1005 Intrinsic Intr = I; 1006 NVPTXRegClass RegClass = RC; 1007 list<Predicate> Predicates = Preds; 1008} 1009 1010multiclass FMA_INST { 1011 foreach P = [ 1012 FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>, 1013 FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>, 1014 FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>, 1015 FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>, 1016 1017 FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>, 1018 FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>, 1019 FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>, 1020 FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>, 1021 FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>, 1022 FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>, 1023 FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, 1024 FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, 1025 1026 FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, 1027 FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs, 1028 [hasPTX<42>, hasSM<53>]>, 1029 FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs, 1030 [hasPTX<42>, hasSM<53>]>, 1031 FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs, 1032 [hasPTX<42>, hasSM<53>]>, 1033 FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs, 1034 [hasPTX<70>, hasSM<80>]>, 1035 FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs, 1036 [hasPTX<70>, hasSM<80>]>, 1037 1038 FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>, 1039 FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs, 1040 [hasPTX<70>, hasSM<80>]>, 1041 FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs, 1042 [hasPTX<70>, hasSM<80>]>, 1043 FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs, 1044 [hasPTX<70>, hasSM<80>]>, 1045 FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, 1046 [hasPTX<70>, hasSM<80>]>, 1047 FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs, 1048 [hasPTX<70>, hasSM<80>]>, 1049 1050 FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs, 1051 [hasPTX<42>, hasSM<53>]>, 1052 FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs, 1053 [hasPTX<42>, hasSM<53>]>, 1054 FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs, 1055 [hasPTX<42>, hasSM<53>]>, 1056 FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, 1057 Int32Regs, [hasPTX<42>, hasSM<53>]>, 1058 FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs, 1059 [hasPTX<70>, hasSM<80>]>, 1060 FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, 1061 Int32Regs, [hasPTX<70>, hasSM<80>]>, 1062 FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, 1063 [hasPTX<70>, hasSM<80>]>, 1064 FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, 1065 [hasPTX<70>, hasSM<80>]> 1066 ] in { 1067 def P.Variant : 1068 F_MATH_3<!strconcat("fma", 1069 !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"), 1070 P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 1071 } 1072} 1073 1074defm INT_NVVM_FMA : FMA_INST; 1075 1076// 1077// Rcp 1078// 1079 1080def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 1081 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 1082def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 1083 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 1084def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 1085 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 1086def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 1087 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 1088def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 1089 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 1090def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 1091 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 1092def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 1093 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 1094def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 1095 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 1096 1097def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 1098 Float64Regs, int_nvvm_rcp_rn_d>; 1099def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 1100 Float64Regs, int_nvvm_rcp_rz_d>; 1101def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 1102 Float64Regs, int_nvvm_rcp_rm_d>; 1103def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 1104 Float64Regs, int_nvvm_rcp_rp_d>; 1105 1106def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;", 1107 Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>; 1108def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 1109 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 1110 1111// 1112// Sqrt 1113// 1114 1115def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 1116 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 1117def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 1118 Float32Regs, int_nvvm_sqrt_rn_f>; 1119def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 1120 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 1121def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 1122 Float32Regs, int_nvvm_sqrt_rz_f>; 1123def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 1124 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 1125def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 1126 Float32Regs, int_nvvm_sqrt_rm_f>; 1127def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 1128 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 1129def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 1130 Float32Regs, int_nvvm_sqrt_rp_f>; 1131def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 1132 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 1133def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 1134 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 1135 1136def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 1137 Float64Regs, int_nvvm_sqrt_rn_d>; 1138def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 1139 Float64Regs, int_nvvm_sqrt_rz_d>; 1140def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 1141 Float64Regs, int_nvvm_sqrt_rm_d>; 1142def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 1143 Float64Regs, int_nvvm_sqrt_rp_d>; 1144 1145// nvvm_sqrt intrinsic 1146def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1147 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 1148def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1149 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 1150def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1151 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 1152def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1153 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 1154 1155// 1156// Rsqrt 1157// 1158 1159def INT_NVVM_RSQRT_APPROX_FTZ_F 1160 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 1161 int_nvvm_rsqrt_approx_ftz_f>; 1162def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 1163 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 1164def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 1165 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 1166 1167// 1168// Add 1169// 1170 1171def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 1172 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 1173def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 1174 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 1175def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 1176 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 1177def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 1178 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 1179def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 1180 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 1181def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 1182 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 1183def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 1184 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 1185def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 1186 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 1187 1188def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 1189 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 1190def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 1191 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 1192def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 1193 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 1194def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 1195 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 1196 1197// 1198// Convert 1199// 1200 1201def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 1202 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 1203def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 1204 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 1205def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 1206 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 1207def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 1208 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 1209def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 1210 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 1211def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 1212 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 1213def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 1214 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 1215def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 1216 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 1217 1218def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 1219 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 1220def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 1221 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 1222def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 1223 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 1224def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 1225 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 1226 1227def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 1228 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 1229def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 1230 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 1231def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 1232 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 1233def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 1234 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 1235 1236def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 1237 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 1238def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 1239 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 1240def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 1241 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 1242def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 1243 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 1244 1245def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 1246 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 1247def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 1248 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 1249def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 1250 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 1251def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 1252 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 1253 1254def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 1255 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1256def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 1257 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 1258def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 1259 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1260def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 1261 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 1262def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 1263 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1264def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 1265 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 1266def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 1267 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1268def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 1269 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 1270 1271def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 1272 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1273def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 1274 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 1275def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 1276 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1277def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 1278 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 1279def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 1280 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1281def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 1282 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 1283def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 1284 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1285def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 1286 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 1287 1288def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 1289 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 1290def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 1291 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 1292def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 1293 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 1294def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 1295 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 1296 1297def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 1298 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 1299def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 1300 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 1301def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 1302 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 1303def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 1304 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 1305 1306def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b), 1307 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1308def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1309 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1310def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b), 1311 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1312def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1313 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1314 1315def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b), 1316 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1317def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1318 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1319def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b), 1320 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1321def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1322 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1323 1324def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a), 1325 (CVT_bf16_f32 Float32Regs:$a, CvtRN)>; 1326def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a), 1327 (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>; 1328def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a), 1329 (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>; 1330def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a), 1331 (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>; 1332 1333def CVT_tf32_f32 : 1334 NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a), 1335 "cvt.rna.tf32.f32 \t$dest, $a;", 1336 [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>; 1337 1338def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1339 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1340 1341def INT_NVVM_D2I_LO : F_MATH_1< 1342 !strconcat("{{\n\t", 1343 ".reg .b32 %temp; \n\t", 1344 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1345 "}}"), 1346 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1347def INT_NVVM_D2I_HI : F_MATH_1< 1348 !strconcat("{{\n\t", 1349 ".reg .b32 %temp; \n\t", 1350 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1351 "}}"), 1352 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1353 1354def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 1355 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1356def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 1357 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 1358def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 1359 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1360def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 1361 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 1362def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 1363 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1364def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 1365 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 1366def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 1367 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1368def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 1369 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 1370 1371def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 1372 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1373def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 1374 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 1375def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 1376 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1377def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 1378 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 1379def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 1380 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1381def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 1382 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 1383def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 1384 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1385def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 1386 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 1387 1388def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 1389 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 1390def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 1391 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 1392def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 1393 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 1394def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 1395 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 1396 1397def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 1398 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 1399def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 1400 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 1401def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 1402 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 1403def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 1404 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 1405 1406def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 1407 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 1408def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 1409 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 1410def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 1411 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 1412def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 1413 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 1414 1415def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 1416 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 1417def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 1418 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 1419def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 1420 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 1421def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 1422 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 1423 1424def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 1425 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 1426def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 1427 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 1428def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 1429 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 1430def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 1431 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 1432 1433def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 1434 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 1435def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 1436 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 1437def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 1438 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 1439def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 1440 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 1441 1442 1443def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 1444 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; 1445def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 1446 (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 1447 1448// 1449// Bitcast 1450// 1451 1452def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 1453 Float32Regs, int_nvvm_bitcast_f2i>; 1454def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 1455 Int32Regs, int_nvvm_bitcast_i2f>; 1456 1457def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1458 Int64Regs, int_nvvm_bitcast_ll2d>; 1459def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1460 Float64Regs, int_nvvm_bitcast_d2ll>; 1461 1462// 1463// FNS 1464// 1465 1466class INT_FNS_MBO<dag ins, dag Operands> 1467 : NVPTXInst<(outs Int32Regs:$dst), ins, 1468 "fns.b32 \t$dst, $mask, $base, $offset;", 1469 [(set Int32Regs:$dst, Operands )]>, 1470 Requires<[hasPTX<60>, hasSM<30>]>; 1471 1472def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1473 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1474def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1475 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1476def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1477 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1478def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1479 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1480def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1481 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1482def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1483 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1484def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1485 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1486def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1487 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1488 1489//----------------------------------- 1490// Atomic Functions 1491//----------------------------------- 1492 1493class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1494 : PatFrag<ops, frag, AS_match.global>; 1495class ATOMIC_SHARED_CHK <dag ops, dag frag> 1496 : PatFrag<ops, frag, AS_match.shared>; 1497class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1498 : PatFrag<ops, frag, AS_match.generic>; 1499 1500multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1501 ValueType regT, NVPTXRegClass regclass, 1502 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1503 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1504 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1505 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1506 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, 1507 Requires<Pred>; 1508 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1509 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1510 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, 1511 Requires<Pred>; 1512} 1513multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1514 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1515 list<Predicate> Pred = []> { 1516 defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1517 IntOp, IMMType, IMM, Pred>; 1518 defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1519 IntOp, IMMType, IMM, Pred>; 1520} 1521 1522// has 2 operands, neg the second one 1523multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1524 ValueType regT, NVPTXRegClass regclass, 1525 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1526 list<Predicate> Pred> { 1527 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1528 !strconcat( 1529 "{{ \n\t", 1530 ".reg \t.s", TypeStr, " temp; \n\t", 1531 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1532 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1533 "}}"), 1534 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, 1535 Requires<Pred>; 1536} 1537multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr, 1538 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { 1539 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1540 IntOp, Pred> ; 1541 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1542 IntOp, Pred> ; 1543} 1544 1545// has 3 operands 1546multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1547 ValueType regT, NVPTXRegClass regclass, 1548 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1549 Operand IMMType, list<Predicate> Pred> { 1550 def reg : NVPTXInst<(outs regclass:$dst), 1551 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1552 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1553 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, 1554 Requires<Pred>; 1555 1556 def imm1 : NVPTXInst<(outs regclass:$dst), 1557 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1558 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1559 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, 1560 Requires<Pred>; 1561 1562 def imm2 : NVPTXInst<(outs regclass:$dst), 1563 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1564 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1565 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, 1566 Requires<Pred>; 1567 1568 def imm3 : NVPTXInst<(outs regclass:$dst), 1569 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1570 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1571 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, 1572 Requires<Pred>; 1573} 1574multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1575 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1576 defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1577 IntOp, IMMType, Pred>; 1578 defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1579 IntOp, IMMType, Pred>; 1580} 1581 1582// atom_add 1583 1584def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1585 (atomic_load_add_32 node:$a, node:$b)>; 1586def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1587 (atomic_load_add_32 node:$a, node:$b)>; 1588def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1589 (atomic_load_add_32 node:$a, node:$b)>; 1590def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1591 (atomic_load_add_64 node:$a, node:$b)>; 1592def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1593 (atomic_load_add_64 node:$a, node:$b)>; 1594def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1595 (atomic_load_add_64 node:$a, node:$b)>; 1596def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1597 (atomic_load_fadd node:$a, node:$b)>; 1598def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1599 (atomic_load_fadd node:$a, node:$b)>; 1600def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1601 (atomic_load_fadd node:$a, node:$b)>; 1602 1603defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add", 1604 atomic_load_add_32_g, i32imm, imm>; 1605defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add", 1606 atomic_load_add_32_s, i32imm, imm>; 1607defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add", 1608 atomic_load_add_32_gen, i32imm, imm>; 1609defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1610 ".add", atomic_load_add_32_gen, i32imm, imm>; 1611 1612defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add", 1613 atomic_load_add_64_g, i64imm, imm>; 1614defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add", 1615 atomic_load_add_64_s, i64imm, imm>; 1616defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add", 1617 atomic_load_add_64_gen, i64imm, imm>; 1618defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 1619 ".add", atomic_load_add_64_gen, i64imm, imm>; 1620 1621defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add", 1622 atomic_load_add_g, f32imm, fpimm>; 1623defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add", 1624 atomic_load_add_s, f32imm, fpimm>; 1625defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add", 1626 atomic_load_add_gen, f32imm, fpimm>; 1627 1628defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add", 1629 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1630defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add", 1631 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1632defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add", 1633 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1634 1635// atom_sub 1636 1637def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1638 (atomic_load_sub_32 node:$a, node:$b)>; 1639def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1640 (atomic_load_sub_32 node:$a, node:$b)>; 1641def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1642 (atomic_load_sub_32 node:$a, node:$b)>; 1643def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1644 (atomic_load_sub_64 node:$a, node:$b)>; 1645def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1646 (atomic_load_sub_64 node:$a, node:$b)>; 1647def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1648 (atomic_load_sub_64 node:$a, node:$b)>; 1649 1650defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add", 1651 atomic_load_sub_32_g>; 1652defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add", 1653 atomic_load_sub_64_g>; 1654defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add", 1655 atomic_load_sub_32_gen>; 1656defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", 1657 ".add", atomic_load_sub_32_gen>; 1658defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add", 1659 atomic_load_sub_32_s>; 1660defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add", 1661 atomic_load_sub_64_s>; 1662defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add", 1663 atomic_load_sub_64_gen>; 1664defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", 1665 ".add", atomic_load_sub_64_gen>; 1666 1667// atom_swap 1668 1669def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1670 (atomic_swap_32 node:$a, node:$b)>; 1671def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1672 (atomic_swap_32 node:$a, node:$b)>; 1673def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1674 (atomic_swap_32 node:$a, node:$b)>; 1675def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1676 (atomic_swap_64 node:$a, node:$b)>; 1677def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1678 (atomic_swap_64 node:$a, node:$b)>; 1679def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1680 (atomic_swap_64 node:$a, node:$b)>; 1681 1682defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch", 1683 atomic_swap_32_g, i32imm, imm>; 1684defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch", 1685 atomic_swap_32_s, i32imm, imm>; 1686defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch", 1687 atomic_swap_32_gen, i32imm, imm>; 1688defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 1689 ".exch", atomic_swap_32_gen, i32imm, imm>; 1690defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch", 1691 atomic_swap_64_g, i64imm, imm>; 1692defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch", 1693 atomic_swap_64_s, i64imm, imm>; 1694defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch", 1695 atomic_swap_64_gen, i64imm, imm>; 1696defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 1697 ".exch", atomic_swap_64_gen, i64imm, imm>; 1698 1699// atom_max 1700 1701def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1702 , (atomic_load_max_32 node:$a, node:$b)>; 1703def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1704 (atomic_load_max_32 node:$a, node:$b)>; 1705def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1706 (atomic_load_max_32 node:$a, node:$b)>; 1707def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1708 , (atomic_load_max_64 node:$a, node:$b)>; 1709def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1710 (atomic_load_max_64 node:$a, node:$b)>; 1711def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1712 (atomic_load_max_64 node:$a, node:$b)>; 1713def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1714 (atomic_load_umax_32 node:$a, node:$b)>; 1715def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1716 (atomic_load_umax_32 node:$a, node:$b)>; 1717def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1718 (atomic_load_umax_32 node:$a, node:$b)>; 1719def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1720 (atomic_load_umax_64 node:$a, node:$b)>; 1721def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1722 (atomic_load_umax_64 node:$a, node:$b)>; 1723def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1724 (atomic_load_umax_64 node:$a, node:$b)>; 1725 1726defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32", 1727 ".max", atomic_load_max_32_g, i32imm, imm>; 1728defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32", 1729 ".max", atomic_load_max_32_s, i32imm, imm>; 1730defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max", 1731 atomic_load_max_32_gen, i32imm, imm>; 1732defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 1733 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1734defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64", 1735 ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>; 1736defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64", 1737 ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>; 1738defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max", 1739 atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>; 1740defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 1741 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>; 1742defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1743 ".max", atomic_load_umax_32_g, i32imm, imm>; 1744defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", 1745 ".max", atomic_load_umax_32_s, i32imm, imm>; 1746defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max", 1747 atomic_load_umax_32_gen, i32imm, imm>; 1748defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 1749 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1750defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 1751 ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>; 1752defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", 1753 ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>; 1754defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max", 1755 atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>; 1756defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 1757 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>; 1758 1759// atom_min 1760 1761def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1762 (atomic_load_min_32 node:$a, node:$b)>; 1763def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1764 (atomic_load_min_32 node:$a, node:$b)>; 1765def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1766 (atomic_load_min_32 node:$a, node:$b)>; 1767def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1768 (atomic_load_min_64 node:$a, node:$b)>; 1769def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1770 (atomic_load_min_64 node:$a, node:$b)>; 1771def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1772 (atomic_load_min_64 node:$a, node:$b)>; 1773def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1774 (atomic_load_umin_32 node:$a, node:$b)>; 1775def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1776 (atomic_load_umin_32 node:$a, node:$b)>; 1777def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1778 (atomic_load_umin_32 node:$a, node:$b)>; 1779def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1780 (atomic_load_umin_64 node:$a, node:$b)>; 1781def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1782 (atomic_load_umin_64 node:$a, node:$b)>; 1783def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1784 (atomic_load_umin_64 node:$a, node:$b)>; 1785 1786defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32", 1787 ".min", atomic_load_min_32_g, i32imm, imm>; 1788defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32", 1789 ".min", atomic_load_min_32_s, i32imm, imm>; 1790defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min", 1791 atomic_load_min_32_gen, i32imm, imm>; 1792defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 1793 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1794defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64", 1795 ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>; 1796defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64", 1797 ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>; 1798defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min", 1799 atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>; 1800defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 1801 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>; 1802defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1803 ".min", atomic_load_umin_32_g, i32imm, imm>; 1804defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", 1805 ".min", atomic_load_umin_32_s, i32imm, imm>; 1806defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min", 1807 atomic_load_umin_32_gen, i32imm, imm>; 1808defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 1809 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1810defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 1811 ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>; 1812defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", 1813 ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>; 1814defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min", 1815 atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>; 1816defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 1817 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>; 1818 1819// atom_inc atom_dec 1820 1821def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1822 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1823def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1824 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1825def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1826 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1827def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1828 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1829def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1830 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1831def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1832 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1833 1834defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc", 1835 atomic_load_inc_32_g, i32imm, imm>; 1836defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc", 1837 atomic_load_inc_32_s, i32imm, imm>; 1838defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc", 1839 atomic_load_inc_32_gen, i32imm, imm>; 1840defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1841 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1842defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec", 1843 atomic_load_dec_32_g, i32imm, imm>; 1844defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec", 1845 atomic_load_dec_32_s, i32imm, imm>; 1846defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec", 1847 atomic_load_dec_32_gen, i32imm, imm>; 1848defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1849 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1850 1851// atom_and 1852 1853def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1854 (atomic_load_and_32 node:$a, node:$b)>; 1855def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1856 (atomic_load_and_32 node:$a, node:$b)>; 1857def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1858 (atomic_load_and_32 node:$a, node:$b)>; 1859def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1860 (atomic_load_and_64 node:$a, node:$b)>; 1861def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1862 (atomic_load_and_64 node:$a, node:$b)>; 1863def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1864 (atomic_load_and_64 node:$a, node:$b)>; 1865 1866defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and", 1867 atomic_load_and_32_g, i32imm, imm>; 1868defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and", 1869 atomic_load_and_32_s, i32imm, imm>; 1870defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and", 1871 atomic_load_and_32_gen, i32imm, imm>; 1872defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 1873 ".and", atomic_load_and_32_gen, i32imm, imm>; 1874defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and", 1875 atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>; 1876defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and", 1877 atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>; 1878defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and", 1879 atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>; 1880defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 1881 ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>; 1882 1883// atom_or 1884 1885def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1886 (atomic_load_or_32 node:$a, node:$b)>; 1887def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1888 (atomic_load_or_32 node:$a, node:$b)>; 1889def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1890 (atomic_load_or_32 node:$a, node:$b)>; 1891def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1892 (atomic_load_or_64 node:$a, node:$b)>; 1893def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1894 (atomic_load_or_64 node:$a, node:$b)>; 1895def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1896 (atomic_load_or_64 node:$a, node:$b)>; 1897 1898defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or", 1899 atomic_load_or_32_g, i32imm, imm>; 1900defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or", 1901 atomic_load_or_32_gen, i32imm, imm>; 1902defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 1903 ".or", atomic_load_or_32_gen, i32imm, imm>; 1904defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or", 1905 atomic_load_or_32_s, i32imm, imm>; 1906defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or", 1907 atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>; 1908defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or", 1909 atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>; 1910defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 1911 ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>; 1912defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or", 1913 atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>; 1914 1915// atom_xor 1916 1917def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1918 (atomic_load_xor_32 node:$a, node:$b)>; 1919def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1920 (atomic_load_xor_32 node:$a, node:$b)>; 1921def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1922 (atomic_load_xor_32 node:$a, node:$b)>; 1923def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1924 (atomic_load_xor_64 node:$a, node:$b)>; 1925def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1926 (atomic_load_xor_64 node:$a, node:$b)>; 1927def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1928 (atomic_load_xor_64 node:$a, node:$b)>; 1929 1930defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor", 1931 atomic_load_xor_32_g, i32imm, imm>; 1932defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor", 1933 atomic_load_xor_32_s, i32imm, imm>; 1934defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor", 1935 atomic_load_xor_32_gen, i32imm, imm>; 1936defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 1937 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1938defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor", 1939 atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>; 1940defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor", 1941 atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>; 1942defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor", 1943 atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>; 1944defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 1945 ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>; 1946 1947// atom_cas 1948 1949def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1950 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1951def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1952 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1953def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1954 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1955def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1956 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1957def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1958 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1959def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1960 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1961 1962defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas", 1963 atomic_cmp_swap_32_g, i32imm>; 1964defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas", 1965 atomic_cmp_swap_32_s, i32imm>; 1966defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas", 1967 atomic_cmp_swap_32_gen, i32imm>; 1968defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", 1969 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1970defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas", 1971 atomic_cmp_swap_64_g, i64imm>; 1972defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas", 1973 atomic_cmp_swap_64_s, i64imm>; 1974defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas", 1975 atomic_cmp_swap_64_gen, i64imm>; 1976defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", 1977 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1978 1979// Support for scoped atomic operations. Matches 1980// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1981// and converts it into the appropriate instruction. 1982// NOTE: not all possible combinations are implemented 1983// 'space' is limited to generic as it's the only one needed to support CUDA. 1984// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1985class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds, 1986 dag ins, dag Operands> 1987 : NVPTXInst<(outs regclass:$result), ins, 1988 AsmStr, 1989 [(set (regT regclass:$result), Operands)]>, 1990 Requires<Preds>; 1991 1992// Define instruction variants for all addressing modes. 1993multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1994 ValueType regT, NVPTXRegClass regclass, Operand ImmType, 1995 SDNode Imm, ValueType ImmTy, 1996 list<Predicate> Preds> { 1997 let AddedComplexity = 1 in { 1998 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1999 (ins Int32Regs:$src, regclass:$b), 2000 (Intr (i32 Int32Regs:$src), (regT regclass:$b))>; 2001 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2002 (ins Int64Regs:$src, regclass:$b), 2003 (Intr (i64 Int64Regs:$src), (regT regclass:$b))>; 2004 } 2005 // tablegen can't infer argument types from Intrinsic (though it can 2006 // from Instruction) so we have to enforce specific type on 2007 // immediates via explicit cast to ImmTy. 2008 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2009 (ins Int32Regs:$src, ImmType:$b), 2010 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>; 2011 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2012 (ins Int64Regs:$src, ImmType:$b), 2013 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>; 2014} 2015 2016multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 2017 ValueType regT, NVPTXRegClass regclass, 2018 Operand ImmType, SDNode Imm, ValueType ImmTy, 2019 list<Predicate> Preds> { 2020 // Variants for register/immediate permutations of $b and $c 2021 let AddedComplexity = 2 in { 2022 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2023 (ins Int32Regs:$src, regclass:$b, regclass:$c), 2024 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>; 2025 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2026 (ins Int64Regs:$src, regclass:$b, regclass:$c), 2027 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>; 2028 } 2029 let AddedComplexity = 1 in { 2030 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2031 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 2032 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>; 2033 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2034 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 2035 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>; 2036 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2037 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 2038 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>; 2039 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2040 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 2041 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>; 2042 } 2043 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2044 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 2045 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>; 2046 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2047 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 2048 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>; 2049} 2050 2051// Constructs intrinsic name and instruction asm strings. 2052multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 2053 string ScopeStr, string SpaceStr, 2054 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2055 ValueType ImmTy, list<Predicate> Preds> { 2056 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 2057 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2058 # "." # OpStr # "." # TypeStr 2059 # " \t$result, [$src], $b;", 2060 !cast<Intrinsic>( 2061 "int_nvvm_atomic_" # OpStr 2062 # "_" # SpaceStr # "_" # IntTypeStr 2063 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2064 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2065} 2066multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 2067 string ScopeStr, string SpaceStr, 2068 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2069 ValueType ImmTy, list<Predicate> Preds> { 2070 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 2071 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2072 # "." # OpStr # "." # TypeStr 2073 # " \t$result, [$src], $b, $c;", 2074 !cast<Intrinsic>( 2075 "int_nvvm_atomic_" # OpStr 2076 # "_" # SpaceStr # "_" # IntTypeStr 2077 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2078 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2079} 2080 2081// Constructs variants for different address spaces. 2082// For now we only need variants for generic space pointers. 2083multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 2084 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType, 2085 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2086 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2087 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2088} 2089multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 2090 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType, 2091 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2092 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2093 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2094} 2095 2096// Constructs variants for different scopes of atomic op. 2097multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 2098 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2099 ValueType ImmTy, list<Predicate> Preds> { 2100 // .gpu scope is default and is currently covered by existing 2101 // atomics w/o explicitly specified scope. 2102 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2103 regT, regclass, ImmType, Imm, ImmTy, 2104 !listconcat(Preds,[hasAtomScope])>; 2105 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2106 regT, regclass, ImmType, Imm, ImmTy, 2107 !listconcat(Preds,[hasAtomScope])>; 2108} 2109multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 2110 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 2111 list<Predicate> Preds> { 2112 // No need to define ".gpu"-scoped atomics. They do the same thing 2113 // as the regular, non-scoped atomics defined elsewhere. 2114 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2115 regT, regclass, ImmType, Imm, ImmTy, 2116 !listconcat(Preds,[hasAtomScope])>; 2117 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2118 regT, regclass, ImmType, Imm, ImmTy, 2119 !listconcat(Preds,[hasAtomScope])>; 2120} 2121 2122// atom.add 2123multiclass ATOM2_add_impl<string OpStr> { 2124 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>; 2125 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2126 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>; 2127 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32, 2128 []>; 2129 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64, 2130 [hasAtomAddF64]>; 2131} 2132 2133// atom.{and,or,xor} 2134multiclass ATOM2_bitwise_impl<string OpStr> { 2135 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2136 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, 2137 [hasAtomBitwise64]>; 2138} 2139 2140// atom.exch 2141multiclass ATOM2_exch_impl<string OpStr> { 2142 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2143 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>; 2144} 2145 2146// atom.{min,max} 2147multiclass ATOM2_minmax_impl<string OpStr> { 2148 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>; 2149 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2150 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64, 2151 [hasAtomMinMax64]>; 2152 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, 2153 [hasAtomMinMax64]>; 2154} 2155 2156// atom.{inc,dec} 2157multiclass ATOM2_incdec_impl<string OpStr> { 2158 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2159} 2160 2161// atom.cas 2162multiclass ATOM3_cas_impl<string OpStr> { 2163 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2164 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>; 2165} 2166 2167defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 2168defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 2169defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 2170defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 2171defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 2172defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 2173defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 2174defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 2175defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 2176defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 2177 2178//----------------------------------- 2179// Support for ldu on sm_20 or later 2180//----------------------------------- 2181 2182// Don't annotate ldu instructions as mayLoad, as they load from memory that is 2183// read-only in a kernel. 2184 2185// Scalar 2186 2187multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 2188 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2189 !strconcat("ldu.global.", TyStr), 2190 []>, Requires<[hasLDU]>; 2191 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2192 !strconcat("ldu.global.", TyStr), 2193 []>, Requires<[hasLDU]>; 2194 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2195 !strconcat("ldu.global.", TyStr), 2196 []>, Requires<[hasLDU]>; 2197 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2198 !strconcat("ldu.global.", TyStr), 2199 []>, Requires<[hasLDU]>; 2200 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2201 !strconcat("ldu.global.", TyStr), 2202 []>, Requires<[hasLDU]>; 2203} 2204 2205defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 2206defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 2207defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 2208defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 2209defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 2210defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 2211 2212// vector 2213 2214// Elementized vector ldu 2215multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2216 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2217 (ins Int32Regs:$src), 2218 !strconcat("ldu.global.", TyStr), []>; 2219 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2220 (ins Int64Regs:$src), 2221 !strconcat("ldu.global.", TyStr), []>; 2222 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2223 (ins MEMri:$src), 2224 !strconcat("ldu.global.", TyStr), []>; 2225 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2226 (ins MEMri64:$src), 2227 !strconcat("ldu.global.", TyStr), []>; 2228 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2229 (ins imemAny:$src), 2230 !strconcat("ldu.global.", TyStr), []>; 2231} 2232 2233multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2234 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2235 regclass:$dst4), (ins Int32Regs:$src), 2236 !strconcat("ldu.global.", TyStr), []>; 2237 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2238 regclass:$dst4), (ins Int64Regs:$src), 2239 !strconcat("ldu.global.", TyStr), []>; 2240 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2241 regclass:$dst4), (ins MEMri:$src), 2242 !strconcat("ldu.global.", TyStr), []>; 2243 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2244 regclass:$dst4), (ins MEMri64:$src), 2245 !strconcat("ldu.global.", TyStr), []>; 2246 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2247 regclass:$dst4), (ins imemAny:$src), 2248 !strconcat("ldu.global.", TyStr), []>; 2249} 2250 2251defm INT_PTX_LDU_G_v2i8_ELE 2252 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2253defm INT_PTX_LDU_G_v2i16_ELE 2254 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2255defm INT_PTX_LDU_G_v2i32_ELE 2256 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2257defm INT_PTX_LDU_G_v2f32_ELE 2258 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2259defm INT_PTX_LDU_G_v2i64_ELE 2260 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2261defm INT_PTX_LDU_G_v2f64_ELE 2262 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2263defm INT_PTX_LDU_G_v4i8_ELE 2264 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2265defm INT_PTX_LDU_G_v4i16_ELE 2266 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2267 Int16Regs>; 2268defm INT_PTX_LDU_G_v4i32_ELE 2269 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2270 Int32Regs>; 2271defm INT_PTX_LDU_G_v4f16_ELE 2272 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2273 Int16Regs>; 2274defm INT_PTX_LDU_G_v4f16x2_ELE 2275 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2276 Int32Regs>; 2277defm INT_PTX_LDU_G_v4f32_ELE 2278 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2279 Float32Regs>; 2280 2281 2282//----------------------------------- 2283// Support for ldg on sm_35 or later 2284//----------------------------------- 2285 2286// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2287// non-coherent texture cache, and therefore the values read must be read-only 2288// during the lifetime of the kernel. 2289 2290multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2291 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2292 !strconcat("ld.global.nc.", TyStr), 2293 []>, Requires<[hasLDG]>; 2294 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2295 !strconcat("ld.global.nc.", TyStr), 2296 []>, Requires<[hasLDG]>; 2297 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2298 !strconcat("ld.global.nc.", TyStr), 2299 []>, Requires<[hasLDG]>; 2300 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2301 !strconcat("ld.global.nc.", TyStr), 2302 []>, Requires<[hasLDG]>; 2303 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2304 !strconcat("ld.global.nc.", TyStr), 2305 []>, Requires<[hasLDG]>; 2306} 2307 2308defm INT_PTX_LDG_GLOBAL_i8 2309 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2310defm INT_PTX_LDG_GLOBAL_i16 2311 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2312defm INT_PTX_LDG_GLOBAL_i32 2313 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2314defm INT_PTX_LDG_GLOBAL_i64 2315 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2316defm INT_PTX_LDG_GLOBAL_f32 2317 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2318defm INT_PTX_LDG_GLOBAL_f64 2319 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2320 2321// vector 2322 2323// Elementized vector ldg 2324multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2325 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2326 (ins Int32Regs:$src), 2327 !strconcat("ld.global.nc.", TyStr), []>; 2328 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2329 (ins Int64Regs:$src), 2330 !strconcat("ld.global.nc.", TyStr), []>; 2331 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2332 (ins MEMri:$src), 2333 !strconcat("ld.global.nc.", TyStr), []>; 2334 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2335 (ins MEMri64:$src), 2336 !strconcat("ld.global.nc.", TyStr), []>; 2337 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2338 (ins imemAny:$src), 2339 !strconcat("ld.global.nc.", TyStr), []>; 2340} 2341 2342multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2343 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2344 regclass:$dst4), (ins Int32Regs:$src), 2345 !strconcat("ld.global.nc.", TyStr), []>; 2346 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2347 regclass:$dst4), (ins Int64Regs:$src), 2348 !strconcat("ld.global.nc.", TyStr), []>; 2349 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2350 regclass:$dst4), (ins MEMri:$src), 2351 !strconcat("ld.global.nc.", TyStr), []>; 2352 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2353 regclass:$dst4), (ins MEMri64:$src), 2354 !strconcat("ld.global.nc.", TyStr), []>; 2355 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2356 regclass:$dst4), (ins imemAny:$src), 2357 !strconcat("ld.global.nc.", TyStr), []>; 2358} 2359 2360// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2361defm INT_PTX_LDG_G_v2i8_ELE 2362 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2363defm INT_PTX_LDG_G_v2i16_ELE 2364 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2365defm INT_PTX_LDG_G_v2i32_ELE 2366 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2367defm INT_PTX_LDG_G_v2f32_ELE 2368 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2369defm INT_PTX_LDG_G_v2i64_ELE 2370 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2371defm INT_PTX_LDG_G_v2f64_ELE 2372 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2373defm INT_PTX_LDG_G_v4i8_ELE 2374 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2375defm INT_PTX_LDG_G_v4i16_ELE 2376 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2377defm INT_PTX_LDG_G_v4i32_ELE 2378 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2379defm INT_PTX_LDG_G_v4f32_ELE 2380 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2381 2382 2383multiclass NG_TO_G<string Str, Intrinsic Intrin> { 2384 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2385 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 2386 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2387 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2388 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 2389 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2390 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 2391 "{{ .reg .b64 %tmp;\n\t" 2392 #" cvt.u64.u32 \t%tmp, $src;\n\t" 2393 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 2394 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 2395 Requires<[useShortPtr]>; 2396} 2397 2398multiclass G_TO_NG<string Str, Intrinsic Intrin> { 2399 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2400 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 2401 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2402 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2403 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 2404 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2405 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 2406 "{{ .reg .b64 %tmp;\n\t" 2407 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 2408 #" cvt.u32.u64 \t$result, %tmp; }}", 2409 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 2410 Requires<[useShortPtr]>; 2411} 2412 2413defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 2414defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 2415defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 2416defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 2417 2418defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 2419defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 2420defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 2421defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 2422 2423 2424// nvvm.ptr.gen.to.param 2425def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 2426 (ins Int32Regs:$src), 2427 "mov.u32 \t$result, $src;", 2428 [(set Int32Regs:$result, 2429 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 2430def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 2431 (ins Int64Regs:$src), 2432 "mov.u64 \t$result, $src;", 2433 [(set Int64Regs:$result, 2434 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2435 2436 2437// nvvm.move intrinsicc 2438def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2439 "mov.b16 \t$r, $s;", 2440 [(set Int16Regs:$r, 2441 (int_nvvm_move_i16 Int16Regs:$s))]>; 2442def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2443 "mov.b32 \t$r, $s;", 2444 [(set Int32Regs:$r, 2445 (int_nvvm_move_i32 Int32Regs:$s))]>; 2446def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2447 "mov.b64 \t$r, $s;", 2448 [(set Int64Regs:$r, 2449 (int_nvvm_move_i64 Int64Regs:$s))]>; 2450def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2451 "mov.f32 \t$r, $s;", 2452 [(set Float32Regs:$r, 2453 (int_nvvm_move_float Float32Regs:$s))]>; 2454def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2455 "mov.f64 \t$r, $s;", 2456 [(set Float64Regs:$r, 2457 (int_nvvm_move_double Float64Regs:$s))]>; 2458def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2459 "mov.u32 \t$r, $s;", 2460 [(set Int32Regs:$r, 2461 (int_nvvm_move_ptr Int32Regs:$s))]>; 2462def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2463 "mov.u64 \t$r, $s;", 2464 [(set Int64Regs:$r, 2465 (int_nvvm_move_ptr Int64Regs:$s))]>; 2466 2467// @TODO: Are these actually needed, or will we always just see symbols 2468// copied to registers first? 2469/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2470 "mov.u32 \t$r, $s;", 2471 [(set Int32Regs:$r, 2472 (int_nvvm_move_ptr texternalsym:$s))]>; 2473def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2474 "mov.u64 \t$r, $s;", 2475 [(set Int64Regs:$r, 2476 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2477 2478 2479// MoveParam %r1, param 2480// ptr_local_to_gen %r2, %r1 2481// ptr_gen_to_local %r3, %r2 2482// -> 2483// mov %r1, param 2484 2485// @TODO: Revisit this. There is a type 2486// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2487// instructions are not currently defined. However, we can use the ptr 2488// variants and the asm printer will do the right thing. 2489def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2490 (MoveParam texternalsym:$src)))), 2491 (nvvm_move_ptr64 texternalsym:$src)>; 2492def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2493 (MoveParam texternalsym:$src)))), 2494 (nvvm_move_ptr32 texternalsym:$src)>; 2495 2496def texsurf_handles 2497 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2498 "mov.u64 \t$result, $src;", []>; 2499 2500//----------------------------------- 2501// Compiler Error Warn 2502// - Just ignore them in codegen 2503//----------------------------------- 2504 2505def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2506 "// llvm.nvvm.compiler.warn()", 2507 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2508def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2509 "// llvm.nvvm.compiler.warn()", 2510 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2511def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2512 "// llvm.nvvm.compiler.error()", 2513 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2514def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2515 "// llvm.nvvm.compiler.error()", 2516 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2517 2518 2519// isspacep 2520 2521multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> { 2522 def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2523 "isspacep." # suffix # "\t$d, $a;", 2524 [(set Int1Regs:$d, (Intr Int32Regs:$a))]>, 2525 Requires<Preds>; 2526 def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2527 "isspacep." # suffix # "\t$d, $a;", 2528 [(set Int1Regs:$d, (Intr Int64Regs:$a))]>, 2529 Requires<Preds>; 2530} 2531 2532defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>; 2533defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>; 2534defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>; 2535defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>; 2536defm isspace_shared_cluster : ISSPACEP<"shared::cluster", 2537 int_nvvm_isspacep_shared_cluster, 2538 [hasPTX<78>, hasSM<90>]>; 2539 2540// Special register reads 2541def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2542 (ins SpecialRegs:$r), 2543 "mov.b32 \t$d, $r;", []>; 2544 2545def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2546def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2547def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2548def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2549def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2550def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2551def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2552def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2553def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2554def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2555def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2556def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2557def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2558def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2559def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2560def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2561def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2562def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2563def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2564def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2565def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2566def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2567def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2568def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2569def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2570def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2571def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2572def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2573def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2574def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2575def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2576def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2577 2578 2579// rotate builtin support 2580 2581def ROTATE_B32_HW_IMM 2582 : NVPTXInst<(outs Int32Regs:$dst), 2583 (ins Int32Regs:$src, i32imm:$amt), 2584 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2585 [(set Int32Regs:$dst, 2586 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2587 Requires<[hasHWROT32]> ; 2588 2589def ROTATE_B32_HW_REG 2590 : NVPTXInst<(outs Int32Regs:$dst), 2591 (ins Int32Regs:$src, Int32Regs:$amt), 2592 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2593 [(set Int32Regs:$dst, 2594 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2595 Requires<[hasHWROT32]> ; 2596 2597def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2598 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2599 Requires<[noHWROT32]> ; 2600 2601def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2602 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2603 Requires<[noHWROT32]> ; 2604 2605let hasSideEffects = false in { 2606 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2607 !strconcat("{{\n\t", 2608 ".reg .b32 %dummy;\n\t", 2609 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2610 "}}"), 2611 []> ; 2612 2613 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2614 !strconcat("{{\n\t", 2615 ".reg .b32 %dummy;\n\t", 2616 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2617 "}}"), 2618 []> ; 2619} 2620 2621let hasSideEffects = false in { 2622 def PACK_TWO_INT32 2623 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2624 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2625} 2626 2627def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2628 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2629 (GET_LO_INT64 Int64Regs:$src))> ; 2630 2631// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2632// no side effects. 2633let hasSideEffects = false in { 2634 def SHF_L_WRAP_B32_IMM 2635 : NVPTXInst<(outs Int32Regs:$dst), 2636 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2637 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2638 Requires<[hasHWROT32]>; 2639 2640 def SHF_L_WRAP_B32_REG 2641 : NVPTXInst<(outs Int32Regs:$dst), 2642 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2643 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2644 Requires<[hasHWROT32]>; 2645 2646 def SHF_R_WRAP_B32_IMM 2647 : NVPTXInst<(outs Int32Regs:$dst), 2648 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2649 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2650 Requires<[hasHWROT32]>; 2651 2652 def SHF_R_WRAP_B32_REG 2653 : NVPTXInst<(outs Int32Regs:$dst), 2654 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2655 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2656 Requires<[hasHWROT32]>; 2657} 2658 2659// HW version of rotate 64 2660def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2661 (PACK_TWO_INT32 2662 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2663 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2664 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2665 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2666 Requires<[hasHWROT32]>; 2667 2668def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2669 (PACK_TWO_INT32 2670 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2671 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2672 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2673 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2674 Requires<[hasHWROT32]>; 2675 2676 2677def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2678 (PACK_TWO_INT32 2679 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2680 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2681 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2682 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2683 Requires<[hasHWROT32]>; 2684 2685def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2686 (PACK_TWO_INT32 2687 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2688 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2689 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2690 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2691 Requires<[hasHWROT32]>; 2692 2693// SW version of rotate 64 2694def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2695 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, 2696 Requires<[noHWROT32]>; 2697def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2698 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2699 Requires<[noHWROT32]>; 2700def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2701 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2702 Requires<[noHWROT32]>; 2703def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2704 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2705 Requires<[noHWROT32]>; 2706 2707 2708//----------------------------------- 2709// Texture Intrinsics 2710//----------------------------------- 2711 2712// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2713// also defined in NVPTXReplaceImageHandles.cpp 2714 2715// texmode_independent 2716let IsTex = true, IsTexModeUnified = false in { 2717// Texture fetch instructions using handles 2718 2719class TEX_1D_base<string inst, NVPTXRegClass outtype, 2720 NVPTXRegClass intype, dag texsamp> 2721 : NVPTXInst<(outs outtype:$r, outtype:$g, 2722 outtype:$b, outtype:$a), 2723 !con(texsamp, (ins intype:$x)), 2724 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2725 []>; 2726 2727multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2728 def _RR : TEX_1D_base<inst, outtype, intype, 2729 (ins Int64Regs:$t, Int64Regs:$s)>; 2730 def _RI : TEX_1D_base<inst, outtype, intype, 2731 (ins Int64Regs:$t, i64imm:$s)>; 2732 def _IR : TEX_1D_base<inst, outtype, intype, 2733 (ins i64imm:$t, Int64Regs:$s)>; 2734 def _II : TEX_1D_base<inst, outtype, intype, 2735 (ins i64imm:$t, i64imm:$s)>; 2736} 2737 2738defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 2739defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2740defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 2741defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2742defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 2743defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2744 2745class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 2746 NVPTXRegClass intype, dag texsamp> 2747 : NVPTXInst<(outs outtype:$r, outtype:$g, 2748 outtype:$b, outtype:$a), 2749 !con(texsamp, (ins intype:$x, intype:$lod)), 2750 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", 2751 []>; 2752 2753multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, 2754 NVPTXRegClass intype> { 2755 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype, 2756 (ins Int64Regs:$t, Int64Regs:$s)>; 2757 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, 2758 (ins Int64Regs:$t, i64imm:$s)>; 2759 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, 2760 (ins i64imm:$t, Int64Regs:$s)>; 2761 def _II : TEX_1D_LEVEL_base<inst, outtype, intype, 2762 (ins i64imm:$t, i64imm:$s)>; 2763} 2764 2765defm TEX_1D_F32_F32_LEVEL : 2766 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2767defm TEX_1D_S32_F32_LEVEL : 2768 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2769defm TEX_1D_U32_F32_LEVEL : 2770 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2771 2772class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, 2773 NVPTXRegClass intype, dag texsamp> 2774 : NVPTXInst<(outs outtype:$r, outtype:$g, 2775 outtype:$b, outtype:$a), 2776 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), 2777 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," 2778 " \\{$gradx\\}, \\{$grady\\};", 2779 []>; 2780 2781multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, 2782 NVPTXRegClass intype> { 2783 def _RR : TEX_1D_GRAD_base<inst, outtype, intype, 2784 (ins Int64Regs:$t, Int64Regs:$s)>; 2785 def _RI : TEX_1D_GRAD_base<inst, outtype, intype, 2786 (ins Int64Regs:$t, i64imm:$s)>; 2787 def _IR : TEX_1D_GRAD_base<inst, outtype, intype, 2788 (ins i64imm:$t, Int64Regs:$s)>; 2789 def _II : TEX_1D_GRAD_base<inst, outtype, intype, 2790 (ins i64imm:$t, i64imm:$s)>; 2791} 2792 2793defm TEX_1D_F32_F32_GRAD 2794 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2795defm TEX_1D_S32_F32_GRAD 2796 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2797defm TEX_1D_U32_F32_GRAD 2798 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2799 2800class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 2801 NVPTXRegClass intype, dag texsamp> 2802 : NVPTXInst<(outs outtype:$r, outtype:$g, 2803 outtype:$b, outtype:$a), 2804 !con(texsamp, (ins Int32Regs:$l, intype:$x)), 2805 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", 2806 []>; 2807 2808multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, 2809 NVPTXRegClass intype> { 2810 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype, 2811 (ins Int64Regs:$t, Int64Regs:$s)>; 2812 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, 2813 (ins Int64Regs:$t, i64imm:$s)>; 2814 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, 2815 (ins i64imm:$t, Int64Regs:$s)>; 2816 def _II : TEX_1D_ARRAY_base<inst, outtype, intype, 2817 (ins i64imm:$t, i64imm:$s)>; 2818} 2819 2820defm TEX_1D_ARRAY_F32_F32 2821 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2822defm TEX_1D_ARRAY_F32_S32 2823 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 2824defm TEX_1D_ARRAY_S32_S32 2825 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 2826defm TEX_1D_ARRAY_S32_F32 2827 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2828defm TEX_1D_ARRAY_U32_S32 2829 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 2830defm TEX_1D_ARRAY_U32_F32 2831 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2832 2833class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2834 NVPTXRegClass intype, dag texsamp> 2835 : NVPTXInst<(outs outtype:$r, outtype:$g, 2836 outtype:$b, outtype:$a), 2837 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), 2838 inst # " \t\\{$r, $g, $b, $a\\}," 2839 " [$t, $s, \\{$l, $x\\}], $lod;", 2840 []>; 2841 2842multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2843 NVPTXRegClass intype> { 2844 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2845 (ins Int64Regs:$t, Int64Regs:$s)>; 2846 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2847 (ins Int64Regs:$t, i64imm:$s)>; 2848 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2849 (ins i64imm:$t, Int64Regs:$s)>; 2850 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2851 (ins i64imm:$t, i64imm:$s)>; 2852} 2853 2854defm TEX_1D_ARRAY_F32_F32_LEVEL 2855 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2856defm TEX_1D_ARRAY_S32_F32_LEVEL 2857 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2858defm TEX_1D_ARRAY_U32_F32_LEVEL 2859 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2860 2861class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2862 NVPTXRegClass intype, dag texsamp> 2863 : NVPTXInst<(outs outtype:$r, outtype:$g, 2864 outtype:$b, outtype:$a), 2865 !con(texsamp, (ins Int32Regs:$l, intype:$x, 2866 intype:$gradx, intype:$grady)), 2867 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," 2868 " \\{$gradx\\}, \\{$grady\\};", 2869 []>; 2870 2871multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 2872 NVPTXRegClass intype> { 2873 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2874 (ins Int64Regs:$t, Int64Regs:$s)>; 2875 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2876 (ins Int64Regs:$t, i64imm:$s)>; 2877 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2878 (ins i64imm:$t, Int64Regs:$s)>; 2879 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2880 (ins i64imm:$t, i64imm:$s)>; 2881} 2882 2883defm TEX_1D_ARRAY_F32_F32_GRAD 2884 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2885defm TEX_1D_ARRAY_S32_F32_GRAD 2886 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2887defm TEX_1D_ARRAY_U32_F32_GRAD 2888 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2889 2890class TEX_2D_base<string inst, NVPTXRegClass outtype, 2891 NVPTXRegClass intype, dag texsamp> 2892 : NVPTXInst<(outs outtype:$r, outtype:$g, 2893 outtype:$b, outtype:$a), 2894 !con(texsamp, (ins intype:$x, intype:$y)), 2895 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", 2896 []>; 2897 2898multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2899 def _RR : TEX_2D_base<inst, outtype, intype, 2900 (ins Int64Regs:$t, Int64Regs:$s)>; 2901 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; 2902 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; 2903 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; 2904} 2905 2906defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2907defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 2908defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 2909defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2910defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 2911defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2912 2913class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 2914 NVPTXRegClass intype, dag texsamp> 2915 : NVPTXInst<(outs outtype:$r, outtype:$g, 2916 outtype:$b, outtype:$a), 2917 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), 2918 inst # " \t\\{$r, $g, $b, $a\\}," 2919 " [$t, $s, \\{$x, $y\\}], $lod;", 2920 []>; 2921 2922multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, 2923 NVPTXRegClass intype> { 2924 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype, 2925 (ins Int64Regs:$t, Int64Regs:$s)>; 2926 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, 2927 (ins Int64Regs:$t, i64imm:$s)>; 2928 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, 2929 (ins i64imm:$t, Int64Regs:$s)>; 2930 def _II : TEX_2D_LEVEL_base<inst, outtype, intype, 2931 (ins i64imm:$t, i64imm:$s)>; 2932} 2933 2934defm TEX_2D_F32_F32_LEVEL : 2935 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2936defm TEX_2D_S32_F32_LEVEL : 2937 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2938defm TEX_2D_U32_F32_LEVEL : 2939 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2940 2941class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, 2942 NVPTXRegClass intype, dag texsamp> 2943 : NVPTXInst<(outs outtype:$r, outtype:$g, 2944 outtype:$b, outtype:$a), 2945 !con(texsamp, (ins intype:$x, intype:$y, 2946 intype:$gradx0, intype:$gradx1, 2947 intype:$grady0, intype:$grady1)), 2948 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," 2949 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 2950 []>; 2951 2952multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, 2953 NVPTXRegClass intype> { 2954 def _RR : TEX_2D_GRAD_base<inst, outtype, intype, 2955 (ins Int64Regs:$t, Int64Regs:$s)>; 2956 def _RI : TEX_2D_GRAD_base<inst, outtype, intype, 2957 (ins Int64Regs:$t, i64imm:$s)>; 2958 def _IR : TEX_2D_GRAD_base<inst, outtype, intype, 2959 (ins i64imm:$t, Int64Regs:$s)>; 2960 def _II : TEX_2D_GRAD_base<inst, outtype, intype, 2961 (ins i64imm:$t, i64imm:$s)>; 2962} 2963 2964defm TEX_2D_F32_F32_GRAD : 2965 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2966defm TEX_2D_S32_F32_GRAD : 2967 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2968defm TEX_2D_U32_F32_GRAD : 2969 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2970 2971class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 2972 NVPTXRegClass intype, dag texsamp> 2973 : NVPTXInst<(outs outtype:$r, outtype:$g, 2974 outtype:$b, outtype:$a), 2975 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), 2976 inst # " \t\\{$r, $g, $b, $a\\}," 2977 " [$t, $s, \\{$l, $x, $y, $y\\}];", 2978 []>; 2979 2980multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, 2981 NVPTXRegClass intype> { 2982 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype, 2983 (ins Int64Regs:$t, Int64Regs:$s)>; 2984 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, 2985 (ins Int64Regs:$t, i64imm:$s)>; 2986 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, 2987 (ins i64imm:$t, Int64Regs:$s)>; 2988 def _II : TEX_2D_ARRAY_base<inst, outtype, intype, 2989 (ins i64imm:$t, i64imm:$s)>; 2990} 2991 2992defm TEX_2D_ARRAY_F32_F32 2993 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2994defm TEX_2D_ARRAY_F32_S32 2995 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 2996defm TEX_2D_ARRAY_S32_S32 2997 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 2998defm TEX_2D_ARRAY_S32_F32 2999 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3000defm TEX_2D_ARRAY_U32_S32 3001 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 3002defm TEX_2D_ARRAY_U32_F32 3003 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3004 3005class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3006 NVPTXRegClass intype, dag texsamp> 3007 : NVPTXInst<(outs outtype:$r, outtype:$g, 3008 outtype:$b, outtype:$a), 3009 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3010 intype:$lod)), 3011 inst # " \t\\{$r, $g, $b, $a\\}," 3012 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 3013 []>; 3014 3015multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3016 NVPTXRegClass intype> { 3017 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3018 (ins Int64Regs:$t, Int64Regs:$s)>; 3019 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3020 (ins Int64Regs:$t, i64imm:$s)>; 3021 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3022 (ins i64imm:$t, Int64Regs:$s)>; 3023 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3024 (ins i64imm:$t, i64imm:$s)>; 3025} 3026 3027defm TEX_2D_ARRAY_F32_F32_LEVEL 3028 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3029defm TEX_2D_ARRAY_S32_F32_LEVEL 3030 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3031defm TEX_2D_ARRAY_U32_F32_LEVEL 3032 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3033 3034class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3035 NVPTXRegClass intype, dag texsamp> 3036 : NVPTXInst<(outs outtype:$r, outtype:$g, 3037 outtype:$b, outtype:$a), 3038 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3039 intype:$gradx0, intype:$gradx1, 3040 intype:$grady0, intype:$grady1)), 3041 inst # " \t\\{$r, $g, $b, $a\\}," 3042 " [$t, $s, \\{$l, $x, $y, $y\\}]," 3043 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3044 []>; 3045 3046multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3047 NVPTXRegClass intype> { 3048 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3049 (ins Int64Regs:$t, Int64Regs:$s)>; 3050 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3051 (ins Int64Regs:$t, i64imm:$s)>; 3052 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3053 (ins i64imm:$t, Int64Regs:$s)>; 3054 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3055 (ins i64imm:$t, i64imm:$s)>; 3056} 3057 3058defm TEX_2D_ARRAY_F32_F32_GRAD 3059 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3060defm TEX_2D_ARRAY_S32_F32_GRAD 3061 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3062defm TEX_2D_ARRAY_U32_F32_GRAD 3063 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3064 3065class TEX_3D_base<string inst, NVPTXRegClass outtype, 3066 NVPTXRegClass intype, dag texsamp> 3067 : NVPTXInst<(outs outtype:$r, outtype:$g, 3068 outtype:$b, outtype:$a), 3069 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3070 inst # " \t\\{$r, $g, $b, $a\\}," 3071 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3072 []>; 3073 3074multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3075 def _RR : TEX_3D_base<inst, outtype, intype, 3076 (ins Int64Regs:$t, Int64Regs:$s)>; 3077 def _RI : TEX_3D_base<inst, outtype, intype, 3078 (ins Int64Regs:$t, i64imm:$s)>; 3079 def _IR : TEX_3D_base<inst, outtype, intype, 3080 (ins i64imm:$t, Int64Regs:$s)>; 3081 def _II : TEX_3D_base<inst, outtype, intype, 3082 (ins i64imm:$t, i64imm:$s)>; 3083} 3084 3085defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3086defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3087defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3088defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3089defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3090defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3091 3092class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3093 NVPTXRegClass intype, dag texsamp> 3094 : NVPTXInst<(outs outtype:$r, outtype:$g, 3095 outtype:$b, outtype:$a), 3096 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3097 intype:$lod)), 3098 inst # " \t\\{$r, $g, $b, $a\\}," 3099 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3100 []>; 3101 3102multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, 3103 NVPTXRegClass intype> { 3104 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype, 3105 (ins Int64Regs:$t, Int64Regs:$s)>; 3106 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, 3107 (ins Int64Regs:$t, i64imm:$s)>; 3108 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, 3109 (ins i64imm:$t, Int64Regs:$s)>; 3110 def _II : TEX_3D_LEVEL_base<inst, outtype, intype, 3111 (ins i64imm:$t, i64imm:$s)>; 3112} 3113 3114defm TEX_3D_F32_F32_LEVEL 3115 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3116defm TEX_3D_S32_F32_LEVEL 3117 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3118defm TEX_3D_U32_F32_LEVEL 3119 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3120 3121class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3122 NVPTXRegClass intype, dag texsamp> 3123 : NVPTXInst<(outs outtype:$r, outtype:$g, 3124 outtype:$b, outtype:$a), 3125 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3126 intype :$gradx0, intype:$gradx1, 3127 intype:$gradx2, intype:$grady0, 3128 intype:$grady1, intype:$grady2)), 3129 inst # " \t\\{$r, $g, $b, $a\\}," 3130 " [$t, $s, \\{$x, $y, $z, $z\\}]," 3131 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3132 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3133 []>; 3134 3135multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, 3136 NVPTXRegClass intype> { 3137 def _RR : TEX_3D_GRAD_base<inst, outtype, intype, 3138 (ins Int64Regs:$t, Int64Regs:$s)>; 3139 def _RI : TEX_3D_GRAD_base<inst, outtype, intype, 3140 (ins Int64Regs:$t, i64imm:$s)>; 3141 def _IR : TEX_3D_GRAD_base<inst, outtype, intype, 3142 (ins i64imm:$t, Int64Regs:$s)>; 3143 def _II : TEX_3D_GRAD_base<inst, outtype, intype, 3144 (ins i64imm:$t, i64imm:$s)>; 3145} 3146 3147defm TEX_3D_F32_F32_GRAD 3148 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3149defm TEX_3D_S32_F32_GRAD 3150 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3151defm TEX_3D_U32_F32_GRAD 3152 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3153 3154class TEX_CUBE_base<string inst, NVPTXRegClass outtype, 3155 NVPTXRegClass intype, dag texsamp> 3156 : NVPTXInst<(outs outtype:$r, outtype:$g, 3157 outtype:$b, outtype:$a), 3158 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3159 inst # " \t\\{$r, $g, $b, $a\\}," 3160 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3161 []>; 3162 3163multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3164 def _RR : TEX_CUBE_base<inst, outtype, intype, 3165 (ins Int64Regs:$t, Int64Regs:$s)>; 3166 def _RI : TEX_CUBE_base<inst, outtype, intype, 3167 (ins Int64Regs:$t, i64imm:$s)>; 3168 def _IR : TEX_CUBE_base<inst, outtype, intype, 3169 (ins i64imm:$t, Int64Regs:$s)>; 3170 def _II : TEX_CUBE_base<inst, outtype, intype, 3171 (ins i64imm:$t, i64imm:$s)>; 3172} 3173 3174defm TEX_CUBE_F32_F32 3175 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3176defm TEX_CUBE_S32_F32 3177 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3178defm TEX_CUBE_U32_F32 3179 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3180 3181class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3182 NVPTXRegClass intype, dag texsamp> 3183 : NVPTXInst<(outs outtype:$r, outtype:$g, 3184 outtype:$b, outtype:$a), 3185 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3186 intype:$lod)), 3187 inst # " \t\\{$r, $g, $b, $a\\}," 3188 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3189 []>; 3190 3191multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3192 NVPTXRegClass intype> { 3193 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3194 (ins Int64Regs:$t, Int64Regs:$s)>; 3195 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3196 (ins Int64Regs:$t, i64imm:$s)>; 3197 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3198 (ins i64imm:$t, Int64Regs:$s)>; 3199 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3200 (ins i64imm:$t, i64imm:$s)>; 3201} 3202 3203defm TEX_CUBE_F32_F32_LEVEL 3204 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3205defm TEX_CUBE_S32_F32_LEVEL 3206 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3207defm TEX_CUBE_U32_F32_LEVEL 3208 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3209 3210class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3211 NVPTXRegClass intype, dag texsamp> 3212 : NVPTXInst<(outs outtype:$r, outtype:$g, 3213 outtype:$b, outtype:$a), 3214 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3215 intype:$z)), 3216 inst # " \t\\{$r, $g, $b, $a\\}," 3217 " [$t, $s, \\{$l, $x, $y, $z\\}];", 3218 []>; 3219 3220multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3221 NVPTXRegClass intype> { 3222 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3223 (ins Int64Regs:$t, Int64Regs:$s)>; 3224 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3225 (ins Int64Regs:$t, i64imm:$s)>; 3226 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3227 (ins i64imm:$t, Int64Regs:$s)>; 3228 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3229 (ins i64imm:$t, i64imm:$s)>; 3230} 3231 3232defm TEX_CUBE_ARRAY_F32_F32 3233 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3234defm TEX_CUBE_ARRAY_S32_F32 3235 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3236defm TEX_CUBE_ARRAY_U32_F32 3237 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3238 3239class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3240 NVPTXRegClass intype, dag texsamp> 3241 : NVPTXInst<(outs outtype:$r, outtype:$g, 3242 outtype:$b, outtype:$a), 3243 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3244 intype:$z, intype:$lod)), 3245 inst # " \t\\{$r, $g, $b, $a\\}," 3246 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3247 []>; 3248 3249multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3250 NVPTXRegClass intype> { 3251 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3252 (ins Int64Regs:$t, Int64Regs:$s)>; 3253 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3254 (ins Int64Regs:$t, i64imm:$s)>; 3255 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3256 (ins i64imm:$t, Int64Regs:$s)>; 3257 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3258 (ins i64imm:$t, i64imm:$s)>; 3259} 3260 3261defm TEX_CUBE_ARRAY_F32_F32_LEVEL 3262 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3263 Float32Regs, Float32Regs>; 3264defm TEX_CUBE_ARRAY_S32_F32_LEVEL 3265 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3266 Int32Regs, Float32Regs>; 3267defm TEX_CUBE_ARRAY_U32_F32_LEVEL 3268 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3269 Int32Regs, Float32Regs>; 3270 3271class TLD4_2D_base<string inst, NVPTXRegClass outtype, 3272 NVPTXRegClass intype, dag texsamp> 3273 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3274 outtype:$v2, outtype:$v3), 3275 !con(texsamp, (ins intype:$x, intype:$y)), 3276 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", 3277 []>; 3278 3279multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3280 def _RR : TLD4_2D_base<inst, outtype, intype, 3281 (ins Int64Regs:$t, Int64Regs:$s)>; 3282 def _RI : TLD4_2D_base<inst, outtype, intype, 3283 (ins Int64Regs:$t, i64imm:$s)>; 3284 def _IR : TLD4_2D_base<inst, outtype, intype, 3285 (ins i64imm:$t, Int64Regs:$s)>; 3286 def _II : TLD4_2D_base<inst, outtype, intype, 3287 (ins i64imm:$t, i64imm:$s)>; 3288} 3289 3290defm TLD4_R_2D_F32_F32 3291 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3292defm TLD4_G_2D_F32_F32 3293 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3294defm TLD4_B_2D_F32_F32 3295 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3296defm TLD4_A_2D_F32_F32 3297 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3298 3299defm TLD4_R_2D_S32_F32 3300 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3301defm TLD4_G_2D_S32_F32 3302 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3303defm TLD4_B_2D_S32_F32 3304 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3305defm TLD4_A_2D_S32_F32 3306 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3307 3308defm TLD4_R_2D_U32_F32 3309 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3310defm TLD4_G_2D_U32_F32 3311 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3312defm TLD4_B_2D_U32_F32 3313 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3314defm TLD4_A_2D_U32_F32 3315 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3316 3317} 3318 3319 3320// texmode_unified 3321let IsTex = true, IsTexModeUnified = true in { 3322// Texture fetch instructions using handles 3323 3324class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, 3325 NVPTXRegClass intype, dag tex> 3326 : NVPTXInst<(outs outtype:$r, outtype:$g, 3327 outtype:$b, outtype:$a), 3328 !con(tex, (ins intype:$x)), 3329 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3330 []>; 3331 3332multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, 3333 NVPTXRegClass intype> { 3334 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3335 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; 3336} 3337 3338defm TEX_UNIFIED_1D_F32_S32 3339 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 3340defm TEX_UNIFIED_1D_F32_F32 3341 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3342defm TEX_UNIFIED_1D_S32_S32 3343 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 3344defm TEX_UNIFIED_1D_S32_F32 3345 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3346defm TEX_UNIFIED_1D_U32_S32 3347 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 3348defm TEX_UNIFIED_1D_U32_F32 3349 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3350 3351class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 3352 NVPTXRegClass intype, dag tex> 3353 : NVPTXInst<(outs outtype:$r, outtype:$g, 3354 outtype:$b, outtype:$a), 3355 !con(tex, (ins intype:$x, intype:$lod)), 3356 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", 3357 []>; 3358 3359multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, 3360 NVPTXRegClass intype> { 3361 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3362 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3363} 3364 3365defm TEX_UNIFIED_1D_F32_F32_LEVEL 3366 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3367defm TEX_UNIFIED_1D_S32_F32_LEVEL 3368 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3369defm TEX_UNIFIED_1D_U32_F32_LEVEL 3370 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3371 3372class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, 3373 NVPTXRegClass intype, dag tex> 3374 : NVPTXInst<(outs outtype:$r, outtype:$g, 3375 outtype:$b, outtype:$a), 3376 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), 3377 inst # " \t\\{$r, $g, $b, $a\\}," 3378 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3379 []>; 3380 3381multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, 3382 NVPTXRegClass intype> { 3383 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3384 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3385} 3386 3387defm TEX_UNIFIED_1D_F32_F32_GRAD 3388 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3389defm TEX_UNIFIED_1D_S32_F32_GRAD 3390 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3391defm TEX_UNIFIED_1D_U32_F32_GRAD 3392 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3393 3394class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 3395 NVPTXRegClass intype, dag tex> 3396 : NVPTXInst<(outs outtype:$r, outtype:$g, 3397 outtype:$b, outtype:$a), 3398 !con(tex, (ins Int32Regs:$l, intype:$x)), 3399 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", 3400 []>; 3401 3402multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, 3403 NVPTXRegClass intype> { 3404 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3405 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3406} 3407 3408defm TEX_UNIFIED_1D_ARRAY_F32_S32 3409 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 3410defm TEX_UNIFIED_1D_ARRAY_F32_F32 3411 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 3412defm TEX_UNIFIED_1D_ARRAY_S32_S32 3413 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 3414defm TEX_UNIFIED_1D_ARRAY_S32_F32 3415 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 3416defm TEX_UNIFIED_1D_ARRAY_U32_S32 3417 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 3418defm TEX_UNIFIED_1D_ARRAY_U32_F32 3419 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 3420 3421class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3422 NVPTXRegClass intype, dag tex> 3423 : NVPTXInst<(outs outtype:$r, outtype:$g, 3424 outtype:$b, outtype:$a), 3425 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), 3426 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", 3427 []>; 3428 3429multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3430 NVPTXRegClass intype> { 3431 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3432 (ins Int64Regs:$t)>; 3433 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3434 (ins i64imm:$t)>; 3435} 3436 3437defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3438 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", 3439 Float32Regs, Float32Regs>; 3440defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3441 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", 3442 Int32Regs, Float32Regs>; 3443defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3444 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", 3445 Int32Regs, Float32Regs>; 3446 3447class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3448 NVPTXRegClass intype, dag tex> 3449 : NVPTXInst<(outs outtype:$r, outtype:$g, 3450 outtype:$b, outtype:$a), 3451 !con(tex, (ins Int32Regs:$l, intype:$x, 3452 intype:$gradx, intype:$grady)), 3453 inst # " \t\\{$r, $g, $b, $a\\}," 3454 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3455 []>; 3456 3457multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3458 NVPTXRegClass intype> { 3459 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3460 (ins Int64Regs:$t)>; 3461 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3462 (ins i64imm:$t)>; 3463} 3464 3465defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3466 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", 3467 Float32Regs, Float32Regs>; 3468defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3469 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", 3470 Int32Regs, Float32Regs>; 3471defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3472 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", 3473 Int32Regs, Float32Regs>; 3474 3475class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3476 NVPTXRegClass intype, dag tex> 3477 : NVPTXInst<(outs outtype:$r, outtype:$g, 3478 outtype:$b, outtype:$a), 3479 !con(tex, (ins intype:$x, intype:$y)), 3480 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", 3481 []>; 3482 3483multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3484 NVPTXRegClass intype> { 3485 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3486 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3487} 3488 3489defm TEX_UNIFIED_2D_F32_S32 3490 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 3491defm TEX_UNIFIED_2D_F32_F32 3492 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3493defm TEX_UNIFIED_2D_S32_S32 3494 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 3495defm TEX_UNIFIED_2D_S32_F32 3496 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3497defm TEX_UNIFIED_2D_U32_S32 3498 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 3499defm TEX_UNIFIED_2D_U32_F32 3500 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3501 3502class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 3503 NVPTXRegClass intype, dag tex> 3504 : NVPTXInst<(outs outtype:$r, outtype:$g, 3505 outtype:$b, outtype:$a), 3506 !con(tex, (ins intype:$x, intype:$y, intype:$lod)), 3507 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", 3508 []>; 3509 3510multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, 3511 NVPTXRegClass intype> { 3512 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3513 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3514} 3515 3516defm TEX_UNIFIED_2D_F32_F32_LEVEL 3517 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3518defm TEX_UNIFIED_2D_S32_F32_LEVEL 3519 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3520defm TEX_UNIFIED_2D_U32_F32_LEVEL 3521 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3522 3523class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, 3524 NVPTXRegClass intype, dag tex> 3525 : NVPTXInst<(outs outtype:$r, outtype:$g, 3526 outtype:$b, outtype:$a), 3527 !con(tex, (ins intype:$x, intype:$y, 3528 intype:$gradx0, intype:$gradx1, 3529 intype:$grady0, intype:$grady1)), 3530 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," 3531 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3532 []>; 3533multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, 3534 NVPTXRegClass intype> { 3535 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3536 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3537} 3538 3539defm TEX_UNIFIED_2D_F32_F32_GRAD 3540 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3541defm TEX_UNIFIED_2D_S32_F32_GRAD 3542 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3543defm TEX_UNIFIED_2D_U32_F32_GRAD 3544 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3545 3546class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 3547 NVPTXRegClass intype, dag tex> 3548 : NVPTXInst<(outs outtype:$r, outtype:$g, 3549 outtype:$b, outtype:$a), 3550 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), 3551 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", 3552 []>; 3553multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, 3554 NVPTXRegClass intype> { 3555 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3556 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3557} 3558 3559defm TEX_UNIFIED_2D_ARRAY_F32_S32 3560 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 3561defm TEX_UNIFIED_2D_ARRAY_F32_F32 3562 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3563defm TEX_UNIFIED_2D_ARRAY_S32_S32 3564 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 3565defm TEX_UNIFIED_2D_ARRAY_S32_F32 3566 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3567defm TEX_UNIFIED_2D_ARRAY_U32_S32 3568 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 3569defm TEX_UNIFIED_2D_ARRAY_U32_F32 3570 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3571 3572class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3573 NVPTXRegClass intype, dag tex> 3574 : NVPTXInst<(outs outtype:$r, outtype:$g, 3575 outtype:$b, outtype:$a), 3576 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3577 intype:$lod)), 3578 inst # " \t\\{$r, $g, $b, $a\\}," 3579 " [$t, \\{$l, $x, $y, $y\\}], $lod;", 3580 []>; 3581multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3582 NVPTXRegClass intype> { 3583 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3584 (ins Int64Regs:$t)>; 3585 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3586 (ins i64imm:$t)>; 3587} 3588 3589defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3590 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", 3591 Float32Regs, Float32Regs>; 3592defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3593 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", 3594 Int32Regs, Float32Regs>; 3595defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3596 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", 3597 Int32Regs, Float32Regs>; 3598 3599class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3600 NVPTXRegClass intype, dag tex> 3601 : NVPTXInst<(outs outtype:$r, outtype:$g, 3602 outtype:$b, outtype:$a), 3603 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3604 intype:$gradx0, intype:$gradx1, 3605 intype:$grady0, intype:$grady1)), 3606 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," 3607 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3608 []>; 3609multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3610 NVPTXRegClass intype> { 3611 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3612 (ins Int64Regs:$t)>; 3613 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3614 (ins i64imm:$t)>; 3615} 3616 3617defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3618 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", 3619 Float32Regs, Float32Regs>; 3620defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3621 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", 3622 Int32Regs, Float32Regs>; 3623defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3624 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", 3625 Int32Regs, Float32Regs>; 3626 3627class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, 3628 NVPTXRegClass intype, dag tex> 3629 : NVPTXInst<(outs outtype:$r, outtype:$g, 3630 outtype:$b, outtype:$a), 3631 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3632 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3633 []>; 3634multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, 3635 NVPTXRegClass intype> { 3636 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3637 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; 3638} 3639 3640defm TEX_UNIFIED_3D_F32_S32 3641 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3642defm TEX_UNIFIED_3D_F32_F32 3643 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3644defm TEX_UNIFIED_3D_S32_S32 3645 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3646defm TEX_UNIFIED_3D_S32_F32 3647 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3648defm TEX_UNIFIED_3D_U32_S32 3649 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3650defm TEX_UNIFIED_3D_U32_F32 3651 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3652 3653class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3654 NVPTXRegClass intype, dag tex> 3655 : NVPTXInst<(outs outtype:$r, outtype:$g, 3656 outtype:$b, outtype:$a), 3657 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3658 inst # " \t\\{$r, $g, $b, $a\\}," 3659 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3660 []>; 3661multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, 3662 NVPTXRegClass intype> { 3663 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3664 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3665} 3666 3667defm TEX_UNIFIED_3D_F32_F32_LEVEL 3668 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3669defm TEX_UNIFIED_3D_S32_F32_LEVEL 3670 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3671defm TEX_UNIFIED_3D_U32_F32_LEVEL 3672 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3673 3674class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3675 NVPTXRegClass intype, dag tex> 3676 : NVPTXInst<(outs outtype:$r, outtype:$g, 3677 outtype:$b, outtype:$a), 3678 !con(tex, (ins intype:$x, intype:$y, intype:$z, 3679 intype:$gradx0, intype:$gradx1, 3680 intype:$gradx2, intype:$grady0, 3681 intype:$grady1, intype:$grady2)), 3682 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 3683 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3684 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3685 []>; 3686multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, 3687 NVPTXRegClass intype> { 3688 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3689 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3690} 3691 3692defm TEX_UNIFIED_3D_F32_F32_GRAD 3693 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3694defm TEX_UNIFIED_3D_S32_F32_GRAD 3695 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3696defm TEX_UNIFIED_3D_U32_F32_GRAD 3697 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3698 3699class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, 3700 NVPTXRegClass intype, dag tex> 3701 : NVPTXInst<(outs outtype:$r, outtype:$g, 3702 outtype:$b, outtype:$a), 3703 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3704 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3705 []>; 3706multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, 3707 NVPTXRegClass intype> { 3708 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3709 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; 3710} 3711 3712defm TEX_UNIFIED_CUBE_F32_F32 3713 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3714defm TEX_UNIFIED_CUBE_S32_F32 3715 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3716defm TEX_UNIFIED_CUBE_U32_F32 3717 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3718 3719class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3720 NVPTXRegClass intype, dag tex> 3721 : NVPTXInst<(outs outtype:$r, outtype:$g, 3722 outtype:$b, outtype:$a), 3723 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3724 inst # " \t\\{$r, $g, $b, $a\\}," 3725 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3726 []>; 3727multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3728 NVPTXRegClass intype> { 3729 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3730 (ins Int64Regs:$t)>; 3731 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3732 (ins i64imm:$t)>; 3733} 3734 3735defm TEX_UNIFIED_CUBE_F32_F32_LEVEL 3736 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", 3737 Float32Regs, Float32Regs>; 3738defm TEX_UNIFIED_CUBE_S32_F32_LEVEL 3739 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", 3740 Int32Regs, Float32Regs>; 3741defm TEX_UNIFIED_CUBE_U32_F32_LEVEL 3742 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", 3743 Int32Regs, Float32Regs>; 3744 3745class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3746 NVPTXRegClass intype, dag tex> 3747 : NVPTXInst<(outs outtype:$r, outtype:$g, 3748 outtype:$b, outtype:$a), 3749 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), 3750 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", 3751 []>; 3752multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3753 NVPTXRegClass intype> { 3754 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3755 (ins Int64Regs:$t)>; 3756 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3757 (ins i64imm:$t)>; 3758} 3759 3760defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 3761 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3762defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 3763 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3764defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 3765 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3766 3767class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3768 NVPTXRegClass intype, dag tex> 3769 : NVPTXInst<(outs outtype:$r, outtype:$g, 3770 outtype:$b, outtype:$a), 3771 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 3772 intype:$lod)), 3773 inst # " \t\\{$r, $g, $b, $a\\}," 3774 " [$t, \\{$l, $x, $y, $z\\}], $lod;", 3775 []>; 3776multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3777 NVPTXRegClass intype> { 3778 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3779 (ins Int64Regs:$t)>; 3780 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3781 (ins i64imm:$t)>; 3782} 3783 3784defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3785 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3786 Float32Regs, Float32Regs>; 3787defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3788 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3789 Int32Regs, Float32Regs>; 3790defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3791 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3792 Int32Regs, Float32Regs>; 3793 3794class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype, 3795 NVPTXRegClass intype, dag tex> 3796 : NVPTXInst<(outs outtype:$r, outtype:$g, 3797 outtype:$b, outtype:$a), 3798 !con(tex, (ins intype:$x, intype:$y, intype:$z, 3799 intype:$gradx0, intype:$gradx1, 3800 intype:$gradx2, intype:$grady0, 3801 intype:$grady1, intype:$grady2)), 3802 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 3803 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3804 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3805 []>; 3806 3807multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype, 3808 NVPTXRegClass intype> { 3809 def _R : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3810 def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3811} 3812 3813defm TEX_UNIFIED_CUBE_F32_F32_GRAD 3814 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3815defm TEX_UNIFIED_CUBE_S32_F32_GRAD 3816 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3817defm TEX_UNIFIED_CUBE_U32_F32_GRAD 3818 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3819 3820class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3821 NVPTXRegClass intype, dag tex> 3822 : NVPTXInst<(outs outtype:$r, outtype:$g, 3823 outtype:$b, outtype:$a), 3824 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 3825 intype:$gradx0, intype:$gradx1, 3826 intype:$gradx2, intype:$grady0, 3827 intype:$grady1, intype:$grady2)), 3828 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}]," 3829 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3830 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3831 []>; 3832multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3833 NVPTXRegClass intype> { 3834 def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype, 3835 (ins Int64Regs:$t)>; 3836 def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype, 3837 (ins i64imm:$t)>; 3838} 3839 3840defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD 3841 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32", 3842 Float32Regs, Float32Regs>; 3843defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD 3844 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32", 3845 Int32Regs, Float32Regs>; 3846defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD 3847 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32", 3848 Int32Regs, Float32Regs>; 3849 3850class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3851 NVPTXRegClass intype, dag tex> 3852 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3853 outtype:$v2, outtype:$v3), 3854 !con(tex, (ins intype:$x, intype:$y)), 3855 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", 3856 []>; 3857multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3858 NVPTXRegClass intype> { 3859 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3860 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3861} 3862 3863defm TLD4_UNIFIED_R_2D_F32_F32 3864 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3865defm TLD4_UNIFIED_G_2D_F32_F32 3866 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3867defm TLD4_UNIFIED_B_2D_F32_F32 3868 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3869defm TLD4_UNIFIED_A_2D_F32_F32 3870 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3871 3872defm TLD4_UNIFIED_R_2D_S32_F32 3873 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3874defm TLD4_UNIFIED_G_2D_S32_F32 3875 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3876defm TLD4_UNIFIED_B_2D_S32_F32 3877 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3878defm TLD4_UNIFIED_A_2D_S32_F32 3879 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3880 3881defm TLD4_UNIFIED_R_2D_U32_F32 3882 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3883defm TLD4_UNIFIED_G_2D_U32_F32 3884 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3885defm TLD4_UNIFIED_B_2D_U32_F32 3886 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3887defm TLD4_UNIFIED_A_2D_U32_F32 3888 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3889 3890} 3891 3892 3893 3894//=== Surface load instructions 3895 3896let IsSuld = true in { 3897 3898class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf> 3899 : NVPTXInst<(outs outtype:$r), 3900 !con(surf, (ins Int32Regs:$x)), 3901 inst # " \\{$r\\}, [$s, \\{$x\\}];", 3902 []>; 3903multiclass SULD_1D<string inst, NVPTXRegClass outtype> { 3904 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>; 3905 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; 3906} 3907 3908defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; 3909defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; 3910defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; 3911defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; 3912 3913defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; 3914defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; 3915defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; 3916defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; 3917 3918defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; 3919defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; 3920defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; 3921defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; 3922 3923class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3924 : NVPTXInst<(outs outtype:$r), 3925 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3926 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", 3927 []>; 3928multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { 3929 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3930 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3931} 3932 3933defm SULD_1D_ARRAY_I8_CLAMP 3934 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; 3935defm SULD_1D_ARRAY_I16_CLAMP 3936 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; 3937defm SULD_1D_ARRAY_I32_CLAMP 3938 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; 3939defm SULD_1D_ARRAY_I64_CLAMP 3940 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; 3941 3942defm SULD_1D_ARRAY_I8_TRAP 3943 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; 3944defm SULD_1D_ARRAY_I16_TRAP 3945 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; 3946defm SULD_1D_ARRAY_I32_TRAP 3947 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; 3948defm SULD_1D_ARRAY_I64_TRAP 3949 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; 3950 3951defm SULD_1D_ARRAY_I8_ZERO 3952 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; 3953defm SULD_1D_ARRAY_I16_ZERO 3954 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; 3955defm SULD_1D_ARRAY_I32_ZERO 3956 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; 3957defm SULD_1D_ARRAY_I64_ZERO 3958 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; 3959 3960class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf> 3961 : NVPTXInst<(outs outtype:$r), 3962 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3963 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", 3964 []>; 3965multiclass SULD_2D<string inst, NVPTXRegClass outtype> { 3966 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>; 3967 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; 3968} 3969 3970defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; 3971defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; 3972defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; 3973defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; 3974 3975defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; 3976defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; 3977defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; 3978defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; 3979 3980defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; 3981defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; 3982defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; 3983defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; 3984 3985class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3986 : NVPTXInst<(outs outtype:$r), 3987 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3988 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3989 []>; 3990multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { 3991 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3992 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3993} 3994 3995defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; 3996defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; 3997defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; 3998defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; 3999 4000defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; 4001defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; 4002defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; 4003defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; 4004 4005defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; 4006defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; 4007defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; 4008defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; 4009 4010class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf> 4011 : NVPTXInst<(outs outtype:$r), 4012 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4013 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4014 []>; 4015multiclass SULD_3D<string inst, NVPTXRegClass outtype> { 4016 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>; 4017 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; 4018} 4019 4020defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; 4021defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; 4022defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; 4023defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; 4024 4025defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; 4026defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; 4027defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; 4028defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; 4029 4030defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; 4031defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; 4032defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; 4033defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; 4034} 4035 4036let IsSuld = 2 in { 4037 4038class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4039 : NVPTXInst<(outs outtype:$r, outtype:$g), 4040 !con(surf, (ins Int32Regs:$x)), 4041 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", 4042 []>; 4043multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { 4044 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4045 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; 4046} 4047 4048defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; 4049defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; 4050defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; 4051defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; 4052 4053defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; 4054defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; 4055defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; 4056defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; 4057 4058defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; 4059defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; 4060defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; 4061defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; 4062 4063class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4064 : NVPTXInst<(outs outtype:$r, outtype:$g), 4065 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 4066 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4067 []>; 4068multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 4069 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4070 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 4071} 4072 4073defm SULD_1D_ARRAY_V2I8_CLAMP 4074 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; 4075defm SULD_1D_ARRAY_V2I16_CLAMP 4076 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; 4077defm SULD_1D_ARRAY_V2I32_CLAMP 4078 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; 4079defm SULD_1D_ARRAY_V2I64_CLAMP 4080 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; 4081 4082defm SULD_1D_ARRAY_V2I8_TRAP 4083 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; 4084defm SULD_1D_ARRAY_V2I16_TRAP 4085 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; 4086defm SULD_1D_ARRAY_V2I32_TRAP 4087 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; 4088defm SULD_1D_ARRAY_V2I64_TRAP 4089 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; 4090 4091defm SULD_1D_ARRAY_V2I8_ZERO 4092 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; 4093defm SULD_1D_ARRAY_V2I16_ZERO 4094 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; 4095defm SULD_1D_ARRAY_V2I32_ZERO 4096 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; 4097defm SULD_1D_ARRAY_V2I64_ZERO 4098 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; 4099 4100class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4101 : NVPTXInst<(outs outtype:$r, outtype:$g), 4102 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4103 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4104 []>; 4105multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { 4106 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4107 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; 4108} 4109 4110defm SULD_2D_V2I8_CLAMP 4111 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; 4112defm SULD_2D_V2I16_CLAMP 4113 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; 4114defm SULD_2D_V2I32_CLAMP 4115 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; 4116defm SULD_2D_V2I64_CLAMP 4117 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; 4118 4119defm SULD_2D_V2I8_TRAP 4120 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; 4121defm SULD_2D_V2I16_TRAP 4122 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; 4123defm SULD_2D_V2I32_TRAP 4124 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; 4125defm SULD_2D_V2I64_TRAP 4126 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; 4127 4128defm SULD_2D_V2I8_ZERO 4129 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; 4130defm SULD_2D_V2I16_ZERO 4131 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; 4132defm SULD_2D_V2I32_ZERO 4133 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; 4134defm SULD_2D_V2I64_ZERO 4135 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; 4136 4137class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4138 : NVPTXInst<(outs outtype:$r, outtype:$g), 4139 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4140 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", 4141 []>; 4142multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 4143 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4144 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 4145} 4146 4147defm SULD_2D_ARRAY_V2I8_CLAMP 4148 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; 4149defm SULD_2D_ARRAY_V2I16_CLAMP 4150 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; 4151defm SULD_2D_ARRAY_V2I32_CLAMP 4152 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; 4153defm SULD_2D_ARRAY_V2I64_CLAMP 4154 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; 4155 4156defm SULD_2D_ARRAY_V2I8_TRAP 4157 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; 4158defm SULD_2D_ARRAY_V2I16_TRAP 4159 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; 4160defm SULD_2D_ARRAY_V2I32_TRAP 4161 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; 4162defm SULD_2D_ARRAY_V2I64_TRAP 4163 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; 4164 4165defm SULD_2D_ARRAY_V2I8_ZERO 4166 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; 4167defm SULD_2D_ARRAY_V2I16_ZERO 4168 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; 4169defm SULD_2D_ARRAY_V2I32_ZERO 4170 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; 4171defm SULD_2D_ARRAY_V2I64_ZERO 4172 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; 4173 4174class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4175 : NVPTXInst<(outs outtype:$r, outtype:$g), 4176 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4177 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4178 []>; 4179multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { 4180 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4181 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; 4182} 4183 4184defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; 4185defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; 4186defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; 4187defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; 4188 4189defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; 4190defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; 4191defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; 4192defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; 4193 4194defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; 4195defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; 4196defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; 4197defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; 4198 4199} 4200 4201let IsSuld = 3 in { 4202 4203class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4204 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4205 !con(surf, (ins Int32Regs:$x)), 4206 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4207 []>; 4208multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { 4209 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4210 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; 4211} 4212 4213defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; 4214defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; 4215defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; 4216 4217defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; 4218defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; 4219defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; 4220 4221defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; 4222defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; 4223defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; 4224 4225class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4226 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4227 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 4228 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", 4229 []>; 4230multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4231 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4232 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4233} 4234 4235defm SULD_1D_ARRAY_V4I8_CLAMP 4236 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; 4237defm SULD_1D_ARRAY_V4I16_CLAMP 4238 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; 4239defm SULD_1D_ARRAY_V4I32_CLAMP 4240 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; 4241 4242defm SULD_1D_ARRAY_V4I8_TRAP 4243 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; 4244defm SULD_1D_ARRAY_V4I16_TRAP 4245 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; 4246defm SULD_1D_ARRAY_V4I32_TRAP 4247 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; 4248 4249defm SULD_1D_ARRAY_V4I8_ZERO 4250 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; 4251defm SULD_1D_ARRAY_V4I16_ZERO 4252 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; 4253defm SULD_1D_ARRAY_V4I32_ZERO 4254 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; 4255 4256class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4257 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4258 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4259 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4260 []>; 4261multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { 4262 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4263 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; 4264} 4265 4266defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; 4267defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; 4268defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; 4269 4270defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; 4271defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; 4272defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; 4273 4274defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; 4275defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; 4276defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; 4277 4278class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4279 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4280 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4281 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", 4282 []>; 4283multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4284 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4285 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4286} 4287 4288defm SULD_2D_ARRAY_V4I8_CLAMP 4289 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; 4290defm SULD_2D_ARRAY_V4I16_CLAMP 4291 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; 4292defm SULD_2D_ARRAY_V4I32_CLAMP 4293 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; 4294 4295defm SULD_2D_ARRAY_V4I8_TRAP 4296 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; 4297defm SULD_2D_ARRAY_V4I16_TRAP 4298 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; 4299defm SULD_2D_ARRAY_V4I32_TRAP 4300 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; 4301 4302defm SULD_2D_ARRAY_V4I8_ZERO 4303 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; 4304defm SULD_2D_ARRAY_V4I16_ZERO 4305 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; 4306defm SULD_2D_ARRAY_V4I32_ZERO 4307 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; 4308 4309class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4310 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4311 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4312 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", 4313 []>; 4314multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { 4315 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4316 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; 4317} 4318 4319defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; 4320defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; 4321defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; 4322 4323defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; 4324defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; 4325defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; 4326 4327defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; 4328defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; 4329defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; 4330 4331} 4332 4333//----------------------------------- 4334// Texture Query Intrinsics 4335//----------------------------------- 4336 4337let IsSurfTexQuery = true in { 4338def TXQ_CHANNEL_ORDER_R 4339 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4340 "txq.channel_order.b32 \t$d, [$a];", 4341 []>; 4342def TXQ_CHANNEL_ORDER_I 4343 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4344 "txq.channel_order.b32 \t$d, [$a];", 4345 []>; 4346def TXQ_CHANNEL_DATA_TYPE_R 4347 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4348 "txq.channel_data_type.b32 \t$d, [$a];", 4349 []>; 4350def TXQ_CHANNEL_DATA_TYPE_I 4351 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4352 "txq.channel_data_type.b32 \t$d, [$a];", 4353 []>; 4354def TXQ_WIDTH_R 4355 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4356 "txq.width.b32 \t$d, [$a];", 4357 []>; 4358def TXQ_WIDTH_I 4359 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4360 "txq.width.b32 \t$d, [$a];", 4361 []>; 4362def TXQ_HEIGHT_R 4363 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4364 "txq.height.b32 \t$d, [$a];", 4365 []>; 4366def TXQ_HEIGHT_I 4367 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4368 "txq.height.b32 \t$d, [$a];", 4369 []>; 4370def TXQ_DEPTH_R 4371 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4372 "txq.depth.b32 \t$d, [$a];", 4373 []>; 4374def TXQ_DEPTH_I 4375 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4376 "txq.depth.b32 \t$d, [$a];", 4377 []>; 4378def TXQ_ARRAY_SIZE_R 4379 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4380 "txq.array_size.b32 \t$d, [$a];", 4381 []>; 4382def TXQ_ARRAY_SIZE_I 4383 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4384 "txq.array_size.b32 \t$d, [$a];", 4385 []>; 4386def TXQ_NUM_SAMPLES_R 4387 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4388 "txq.num_samples.b32 \t$d, [$a];", 4389 []>; 4390def TXQ_NUM_SAMPLES_I 4391 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4392 "txq.num_samples.b32 \t$d, [$a];", 4393 []>; 4394def TXQ_NUM_MIPMAP_LEVELS_R 4395 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4396 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4397 []>; 4398def TXQ_NUM_MIPMAP_LEVELS_I 4399 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4400 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4401 []>; 4402} 4403 4404def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4405 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4406def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4407 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4408def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4409 (TXQ_WIDTH_R Int64Regs:$a)>; 4410def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4411 (TXQ_HEIGHT_R Int64Regs:$a)>; 4412def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4413 (TXQ_DEPTH_R Int64Regs:$a)>; 4414def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4415 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; 4416def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4417 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; 4418def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4419 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; 4420 4421 4422//----------------------------------- 4423// Surface Query Intrinsics 4424//----------------------------------- 4425 4426let IsSurfTexQuery = true in { 4427def SUQ_CHANNEL_ORDER_R 4428 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4429 "suq.channel_order.b32 \t$d, [$a];", 4430 []>; 4431def SUQ_CHANNEL_ORDER_I 4432 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4433 "suq.channel_order.b32 \t$d, [$a];", 4434 []>; 4435def SUQ_CHANNEL_DATA_TYPE_R 4436 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4437 "suq.channel_data_type.b32 \t$d, [$a];", 4438 []>; 4439def SUQ_CHANNEL_DATA_TYPE_I 4440 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4441 "suq.channel_data_type.b32 \t$d, [$a];", 4442 []>; 4443def SUQ_WIDTH_R 4444 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4445 "suq.width.b32 \t$d, [$a];", 4446 []>; 4447def SUQ_WIDTH_I 4448 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4449 "suq.width.b32 \t$d, [$a];", 4450 []>; 4451def SUQ_HEIGHT_R 4452 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4453 "suq.height.b32 \t$d, [$a];", 4454 []>; 4455def SUQ_HEIGHT_I 4456 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4457 "suq.height.b32 \t$d, [$a];", 4458 []>; 4459def SUQ_DEPTH_R 4460 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4461 "suq.depth.b32 \t$d, [$a];", 4462 []>; 4463def SUQ_DEPTH_I 4464 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4465 "suq.depth.b32 \t$d, [$a];", 4466 []>; 4467def SUQ_ARRAY_SIZE_R 4468 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4469 "suq.array_size.b32 \t$d, [$a];", 4470 []>; 4471def SUQ_ARRAY_SIZE_I 4472 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4473 "suq.array_size.b32 \t$d, [$a];", 4474 []>; 4475} 4476 4477def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4478 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4479def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4480 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4481def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4482 (SUQ_WIDTH_R Int64Regs:$a)>; 4483def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4484 (SUQ_HEIGHT_R Int64Regs:$a)>; 4485def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4486 (SUQ_DEPTH_R Int64Regs:$a)>; 4487def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4488 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; 4489 4490 4491//===- Handle Query -------------------------------------------------------===// 4492 4493// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4494def ISTYPEP_SAMPLER 4495 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4496 "istypep.samplerref \t$d, $a;", 4497 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4498def ISTYPEP_SURFACE 4499 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4500 "istypep.surfref \t$d, $a;", 4501 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4502def ISTYPEP_TEXTURE 4503 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4504 "istypep.texref \t$d, $a;", 4505 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4506 4507//===- Surface Stores -----------------------------------------------------===// 4508 4509let IsSust = true in { 4510 4511class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf> 4512 : NVPTXInst<(outs), 4513 !con(surf, (ins Int32Regs:$x, intype:$r)), 4514 inst # " \t[$s, \\{$x\\}], \\{$r\\};", 4515 []>; 4516multiclass SUST_1D<string inst, NVPTXRegClass intype> { 4517 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>; 4518 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>; 4519} 4520 4521defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; 4522defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; 4523defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; 4524defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; 4525 4526defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; 4527defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; 4528defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; 4529defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; 4530 4531defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; 4532defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; 4533defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; 4534defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; 4535 4536defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; 4537defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; 4538defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; 4539 4540class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4541 : NVPTXInst<(outs), 4542 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), 4543 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", 4544 []>; 4545multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> { 4546 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4547 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>; 4548} 4549 4550defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; 4551defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; 4552defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; 4553defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; 4554 4555defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; 4556defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; 4557defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; 4558defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; 4559 4560defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; 4561defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; 4562defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; 4563defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; 4564 4565defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; 4566defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; 4567defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; 4568 4569class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4570 : NVPTXInst<(outs), 4571 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, 4572 intype:$b, intype:$a)), 4573 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4574 []>; 4575multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> { 4576 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4577 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>; 4578} 4579 4580defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; 4581defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; 4582defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; 4583 4584defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; 4585defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; 4586defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; 4587 4588defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; 4589defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; 4590defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; 4591 4592defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; 4593defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; 4594defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; 4595 4596class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4597 : NVPTXInst<(outs), 4598 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), 4599 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4600 []>; 4601multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> { 4602 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4603 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4604} 4605 4606defm SUST_B_1D_ARRAY_B8_CLAMP 4607 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; 4608defm SUST_B_1D_ARRAY_B16_CLAMP 4609 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; 4610defm SUST_B_1D_ARRAY_B32_CLAMP 4611 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; 4612defm SUST_B_1D_ARRAY_B64_CLAMP 4613 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; 4614 4615defm SUST_B_1D_ARRAY_B8_TRAP 4616 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; 4617defm SUST_B_1D_ARRAY_B16_TRAP 4618 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; 4619defm SUST_B_1D_ARRAY_B32_TRAP 4620 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; 4621defm SUST_B_1D_ARRAY_B64_TRAP 4622 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; 4623 4624defm SUST_B_1D_ARRAY_B8_ZERO 4625 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; 4626defm SUST_B_1D_ARRAY_B16_ZERO 4627 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; 4628defm SUST_B_1D_ARRAY_B32_ZERO 4629 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; 4630defm SUST_B_1D_ARRAY_B64_ZERO 4631 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; 4632 4633defm SUST_P_1D_ARRAY_B8_TRAP 4634 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; 4635defm SUST_P_1D_ARRAY_B16_TRAP 4636 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; 4637defm SUST_P_1D_ARRAY_B32_TRAP 4638 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; 4639 4640class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4641 : NVPTXInst<(outs), 4642 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4643 intype:$r, intype:$g)), 4644 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4645 []>; 4646multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4647 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4648 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4649} 4650 4651defm SUST_B_1D_ARRAY_V2B8_CLAMP 4652 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; 4653defm SUST_B_1D_ARRAY_V2B16_CLAMP 4654 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; 4655defm SUST_B_1D_ARRAY_V2B32_CLAMP 4656 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; 4657defm SUST_B_1D_ARRAY_V2B64_CLAMP 4658 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; 4659 4660defm SUST_B_1D_ARRAY_V2B8_TRAP 4661 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; 4662defm SUST_B_1D_ARRAY_V2B16_TRAP 4663 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; 4664defm SUST_B_1D_ARRAY_V2B32_TRAP 4665 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; 4666defm SUST_B_1D_ARRAY_V2B64_TRAP 4667 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; 4668 4669defm SUST_B_1D_ARRAY_V2B8_ZERO 4670 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; 4671defm SUST_B_1D_ARRAY_V2B16_ZERO 4672 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; 4673defm SUST_B_1D_ARRAY_V2B32_ZERO 4674 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; 4675defm SUST_B_1D_ARRAY_V2B64_ZERO 4676 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; 4677 4678defm SUST_P_1D_ARRAY_V2B8_TRAP 4679 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; 4680defm SUST_P_1D_ARRAY_V2B16_TRAP 4681 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; 4682defm SUST_P_1D_ARRAY_V2B32_TRAP 4683 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; 4684 4685class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4686 : NVPTXInst<(outs), 4687 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4688 intype:$r, intype:$g, intype:$b, intype:$a)), 4689 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", 4690 []>; 4691multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4692 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4693 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4694} 4695 4696defm SUST_B_1D_ARRAY_V4B8_CLAMP 4697 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; 4698defm SUST_B_1D_ARRAY_V4B16_CLAMP 4699 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; 4700defm SUST_B_1D_ARRAY_V4B32_CLAMP 4701 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; 4702 4703defm SUST_B_1D_ARRAY_V4B8_TRAP 4704 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; 4705defm SUST_B_1D_ARRAY_V4B16_TRAP 4706 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; 4707defm SUST_B_1D_ARRAY_V4B32_TRAP 4708 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; 4709 4710defm SUST_B_1D_ARRAY_V4B8_ZERO 4711 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; 4712defm SUST_B_1D_ARRAY_V4B16_ZERO 4713 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; 4714defm SUST_B_1D_ARRAY_V4B32_ZERO 4715 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; 4716 4717defm SUST_P_1D_ARRAY_V4B8_TRAP 4718 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; 4719defm SUST_P_1D_ARRAY_V4B16_TRAP 4720 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; 4721defm SUST_P_1D_ARRAY_V4B32_TRAP 4722 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; 4723 4724class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf> 4725 : NVPTXInst<(outs), 4726 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), 4727 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", 4728 []>; 4729multiclass SUST_2D<string inst, NVPTXRegClass intype> { 4730 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>; 4731 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>; 4732} 4733 4734defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; 4735defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; 4736defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; 4737defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; 4738 4739defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; 4740defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; 4741defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; 4742defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; 4743 4744defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; 4745defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; 4746defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; 4747defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; 4748 4749defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; 4750defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; 4751defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; 4752 4753class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4754 : NVPTXInst<(outs), 4755 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4756 intype:$r, intype:$g)), 4757 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4758 []>; 4759multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> { 4760 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4761 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>; 4762} 4763 4764defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; 4765defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; 4766defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; 4767defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; 4768 4769defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; 4770defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; 4771defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; 4772defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; 4773 4774defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; 4775defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; 4776defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; 4777defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; 4778 4779defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; 4780defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; 4781defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; 4782 4783class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4784 : NVPTXInst<(outs), 4785 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4786 intype:$r, intype:$g, intype:$b, intype:$a)), 4787 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", 4788 []>; 4789multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> { 4790 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4791 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>; 4792} 4793 4794defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; 4795defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; 4796defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; 4797 4798defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; 4799defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; 4800defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; 4801 4802defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; 4803defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; 4804defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; 4805 4806defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; 4807defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; 4808defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; 4809 4810class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4811 : NVPTXInst<(outs), 4812 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4813 intype:$r)), 4814 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4815 []>; 4816multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> { 4817 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4818 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4819} 4820 4821defm SUST_B_2D_ARRAY_B8_CLAMP 4822 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; 4823defm SUST_B_2D_ARRAY_B16_CLAMP 4824 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; 4825defm SUST_B_2D_ARRAY_B32_CLAMP 4826 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; 4827defm SUST_B_2D_ARRAY_B64_CLAMP 4828 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; 4829 4830defm SUST_B_2D_ARRAY_B8_TRAP 4831 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; 4832defm SUST_B_2D_ARRAY_B16_TRAP 4833 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; 4834defm SUST_B_2D_ARRAY_B32_TRAP 4835 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; 4836defm SUST_B_2D_ARRAY_B64_TRAP 4837 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; 4838 4839defm SUST_B_2D_ARRAY_B8_ZERO 4840 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; 4841defm SUST_B_2D_ARRAY_B16_ZERO 4842 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; 4843defm SUST_B_2D_ARRAY_B32_ZERO 4844 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; 4845defm SUST_B_2D_ARRAY_B64_ZERO 4846 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; 4847 4848defm SUST_P_2D_ARRAY_B8_TRAP 4849 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; 4850defm SUST_P_2D_ARRAY_B16_TRAP 4851 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; 4852defm SUST_P_2D_ARRAY_B32_TRAP 4853 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; 4854 4855class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4856 : NVPTXInst<(outs), 4857 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4858 intype:$r, intype:$g)), 4859 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", 4860 []>; 4861multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4862 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4863 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4864} 4865 4866defm SUST_B_2D_ARRAY_V2B8_CLAMP 4867 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; 4868defm SUST_B_2D_ARRAY_V2B16_CLAMP 4869 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; 4870defm SUST_B_2D_ARRAY_V2B32_CLAMP 4871 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; 4872defm SUST_B_2D_ARRAY_V2B64_CLAMP 4873 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; 4874 4875defm SUST_B_2D_ARRAY_V2B8_TRAP 4876 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; 4877defm SUST_B_2D_ARRAY_V2B16_TRAP 4878 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; 4879defm SUST_B_2D_ARRAY_V2B32_TRAP 4880 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; 4881defm SUST_B_2D_ARRAY_V2B64_TRAP 4882 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; 4883 4884defm SUST_B_2D_ARRAY_V2B8_ZERO 4885 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; 4886defm SUST_B_2D_ARRAY_V2B16_ZERO 4887 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; 4888defm SUST_B_2D_ARRAY_V2B32_ZERO 4889 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; 4890defm SUST_B_2D_ARRAY_V2B64_ZERO 4891 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; 4892 4893defm SUST_P_2D_ARRAY_V2B8_TRAP 4894 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; 4895defm SUST_P_2D_ARRAY_V2B16_TRAP 4896 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; 4897defm SUST_P_2D_ARRAY_V2B32_TRAP 4898 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; 4899 4900class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4901 : NVPTXInst<(outs), 4902 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4903 intype:$r, intype:$g, intype:$b, intype:$a)), 4904 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", 4905 []>; 4906multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4907 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4908 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4909} 4910 4911defm SUST_B_2D_ARRAY_V4B8_CLAMP 4912 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; 4913defm SUST_B_2D_ARRAY_V4B16_CLAMP 4914 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; 4915defm SUST_B_2D_ARRAY_V4B32_CLAMP 4916 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; 4917 4918defm SUST_B_2D_ARRAY_V4B8_TRAP 4919 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; 4920defm SUST_B_2D_ARRAY_V4B16_TRAP 4921 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; 4922defm SUST_B_2D_ARRAY_V4B32_TRAP 4923 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; 4924 4925defm SUST_B_2D_ARRAY_V4B8_ZERO 4926 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; 4927defm SUST_B_2D_ARRAY_V4B16_ZERO 4928 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; 4929defm SUST_B_2D_ARRAY_V4B32_ZERO 4930 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; 4931 4932defm SUST_P_2D_ARRAY_V4B8_TRAP 4933 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; 4934defm SUST_P_2D_ARRAY_V4B16_TRAP 4935 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; 4936defm SUST_P_2D_ARRAY_V4B32_TRAP 4937 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; 4938 4939class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf> 4940 : NVPTXInst<(outs), 4941 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4942 intype:$r)), 4943 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4944 []>; 4945multiclass SUST_3D<string inst, NVPTXRegClass intype> { 4946 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>; 4947 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>; 4948} 4949 4950defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; 4951defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; 4952defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; 4953defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; 4954 4955defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; 4956defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; 4957defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; 4958defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; 4959 4960defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; 4961defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; 4962defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; 4963defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; 4964 4965defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; 4966defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; 4967defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; 4968 4969class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4970 : NVPTXInst<(outs), 4971 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4972 intype:$r, intype:$g)), 4973 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", 4974 []>; 4975multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> { 4976 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4977 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>; 4978} 4979 4980defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; 4981defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; 4982defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; 4983defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; 4984 4985defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; 4986defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; 4987defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; 4988defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; 4989 4990defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; 4991defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; 4992defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; 4993defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; 4994 4995defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; 4996defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; 4997defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; 4998 4999class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf> 5000 : NVPTXInst<(outs), 5001 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5002 intype:$r, intype:$g, intype:$b, intype:$a)), 5003 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", 5004 []>; 5005multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> { 5006 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>; 5007 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>; 5008} 5009 5010defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; 5011defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; 5012defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; 5013 5014defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; 5015defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; 5016defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; 5017 5018defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; 5019defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; 5020defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; 5021 5022defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; 5023defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; 5024defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; 5025 5026} 5027 5028// Surface store instruction patterns 5029// I'm not sure why we can't just include these in the instruction definitions, 5030// but TableGen complains of type errors :( 5031 5032// .clamp variant 5033def : Pat<(int_nvvm_sust_b_1d_i8_clamp 5034 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5035 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5036 5037def : Pat<(int_nvvm_sust_b_1d_i16_clamp 5038 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5039 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5040 5041def : Pat<(int_nvvm_sust_b_1d_i32_clamp 5042 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5043 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5044 5045def : Pat<(int_nvvm_sust_b_1d_i64_clamp 5046 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5047 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5048 5049def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 5050 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5051 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5052 Int16Regs:$r, Int16Regs:$g)>; 5053 5054def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 5055 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5056 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5057 Int16Regs:$r, Int16Regs:$g)>; 5058 5059def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 5060 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5061 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5062 Int32Regs:$r, Int32Regs:$g)>; 5063 5064def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 5065 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5066 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5067 Int64Regs:$r, Int64Regs:$g)>; 5068 5069def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 5070 Int64Regs:$s, Int32Regs:$x, 5071 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5072 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5073 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5074 5075def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 5076 Int64Regs:$s, Int32Regs:$x, 5077 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5078 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5079 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5080 5081def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 5082 Int64Regs:$s, Int32Regs:$x, 5083 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5084 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5085 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5086 5087 5088 5089def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 5090 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5091 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5092 Int16Regs:$r)>; 5093 5094def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 5095 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5096 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5097 Int16Regs:$r)>; 5098 5099def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 5100 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5101 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5102 Int32Regs:$r)>; 5103 5104def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 5105 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5106 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5107 Int64Regs:$r)>; 5108 5109def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 5110 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5111 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5112 Int16Regs:$r, Int16Regs:$g)>; 5113 5114def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 5115 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5116 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5117 Int16Regs:$r, Int16Regs:$g)>; 5118 5119def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 5120 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5121 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5122 Int32Regs:$r, Int32Regs:$g)>; 5123 5124def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 5125 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5126 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5127 Int64Regs:$r, Int64Regs:$g)>; 5128 5129def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 5130 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5131 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5132 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5133 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5134 5135def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 5136 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5137 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5138 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5139 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5140 5141def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 5142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5143 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5144 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5145 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5146 5147 5148 5149def : Pat<(int_nvvm_sust_b_2d_i8_clamp 5150 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5151 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5152 Int16Regs:$r)>; 5153 5154def : Pat<(int_nvvm_sust_b_2d_i16_clamp 5155 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5156 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5157 Int16Regs:$r)>; 5158 5159def : Pat<(int_nvvm_sust_b_2d_i32_clamp 5160 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5161 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5162 Int32Regs:$r)>; 5163 5164def : Pat<(int_nvvm_sust_b_2d_i64_clamp 5165 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5166 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5167 Int64Regs:$r)>; 5168 5169def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 5170 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5171 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5172 Int16Regs:$r, Int16Regs:$g)>; 5173 5174def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 5175 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5176 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5177 Int16Regs:$r, Int16Regs:$g)>; 5178 5179def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 5180 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5181 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5182 Int32Regs:$r, Int32Regs:$g)>; 5183 5184def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 5185 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5186 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5187 Int64Regs:$r, Int64Regs:$g)>; 5188 5189def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 5190 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5191 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5192 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5193 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5194 5195def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 5196 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5197 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5198 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5199 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5200 5201def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 5202 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5203 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5204 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5205 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5206 5207 5208 5209def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 5210 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5211 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, 5212 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5213 Int16Regs:$r)>; 5214 5215def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 5216 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5217 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, 5218 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5219 Int16Regs:$r)>; 5220 5221def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 5222 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5223 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, 5224 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5225 Int32Regs:$r)>; 5226 5227def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 5228 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5229 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, 5230 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5231 Int64Regs:$r)>; 5232 5233def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 5234 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5235 Int16Regs:$r, Int16Regs:$g), 5236 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5237 Int32Regs:$x, Int32Regs:$y, 5238 Int16Regs:$r, Int16Regs:$g)>; 5239 5240def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 5241 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5242 Int16Regs:$r, Int16Regs:$g), 5243 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5244 Int32Regs:$x, Int32Regs:$y, 5245 Int16Regs:$r, Int16Regs:$g)>; 5246 5247def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 5248 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5249 Int32Regs:$g), 5250 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5251 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5252 5253def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 5254 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5255 Int64Regs:$g), 5256 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5257 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5258 5259def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 5260 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5261 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5262 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, 5263 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5264 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5265 5266def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 5267 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5268 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5269 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, 5270 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5271 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5272 5273def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 5274 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5275 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5276 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5277 Int32Regs:$x, Int32Regs:$y, 5278 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5279 5280 5281 5282def : Pat<(int_nvvm_sust_b_3d_i8_clamp 5283 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5284 Int16Regs:$r), 5285 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, 5286 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5287 Int16Regs:$r)>; 5288 5289def : Pat<(int_nvvm_sust_b_3d_i16_clamp 5290 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5291 Int16Regs:$r), 5292 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, 5293 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5294 Int16Regs:$r)>; 5295 5296def : Pat<(int_nvvm_sust_b_3d_i32_clamp 5297 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5298 Int32Regs:$r), 5299 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, 5300 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5301 Int32Regs:$r)>; 5302 5303def : Pat<(int_nvvm_sust_b_3d_i64_clamp 5304 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5305 Int64Regs:$r), 5306 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, 5307 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5308 Int64Regs:$r)>; 5309 5310def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 5311 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5312 Int16Regs:$r, Int16Regs:$g), 5313 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, 5314 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5315 Int16Regs:$r, Int16Regs:$g)>; 5316 5317def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 5318 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5319 Int16Regs:$r, Int16Regs:$g), 5320 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, 5321 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5322 Int16Regs:$r, Int16Regs:$g)>; 5323 5324def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 5325 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5326 Int32Regs:$r, Int32Regs:$g), 5327 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, 5328 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5329 Int32Regs:$r, Int32Regs:$g)>; 5330 5331def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 5332 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5333 Int64Regs:$r, Int64Regs:$g), 5334 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, 5335 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5336 Int64Regs:$r, Int64Regs:$g)>; 5337 5338def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 5339 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5340 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5341 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, 5342 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5343 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5344 5345def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 5346 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5347 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5348 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, 5349 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5350 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5351 5352def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 5353 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5354 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5355 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, 5356 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5357 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5358 5359 5360// .trap variant 5361def : Pat<(int_nvvm_sust_b_1d_i8_trap 5362 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5363 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5364 5365def : Pat<(int_nvvm_sust_b_1d_i16_trap 5366 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5367 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5368 5369def : Pat<(int_nvvm_sust_b_1d_i32_trap 5370 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5371 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5372 5373def : Pat<(int_nvvm_sust_b_1d_i64_trap 5374 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5375 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5376 5377def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 5378 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5379 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5380 Int16Regs:$r, Int16Regs:$g)>; 5381 5382def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 5383 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5384 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5385 Int16Regs:$r, Int16Regs:$g)>; 5386 5387def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 5388 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5389 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5390 Int32Regs:$r, Int32Regs:$g)>; 5391 5392def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 5393 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5394 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, 5395 Int64Regs:$r, Int64Regs:$g)>; 5396 5397def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 5398 Int64Regs:$s, Int32Regs:$x, 5399 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5400 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5401 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5402 5403def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 5404 Int64Regs:$s, Int32Regs:$x, 5405 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5406 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5407 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5408 5409def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 5410 Int64Regs:$s, Int32Regs:$x, 5411 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5412 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5413 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5414 5415 5416 5417def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 5418 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5419 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5420 Int16Regs:$r)>; 5421 5422def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 5423 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5424 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5425 Int16Regs:$r)>; 5426 5427def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 5428 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5429 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5430 Int32Regs:$r)>; 5431 5432def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 5433 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5434 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5435 Int64Regs:$r)>; 5436 5437def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 5438 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5439 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5440 Int16Regs:$r, Int16Regs:$g)>; 5441 5442def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 5443 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5444 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5445 Int16Regs:$r, Int16Regs:$g)>; 5446 5447def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 5448 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5449 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5450 Int32Regs:$r, Int32Regs:$g)>; 5451 5452def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 5453 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5454 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5455 Int64Regs:$r, Int64Regs:$g)>; 5456 5457def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 5458 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5459 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5460 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5461 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5462 5463def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 5464 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5465 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5466 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5467 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5468 5469def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 5470 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5471 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5472 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5473 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5474 5475 5476 5477def : Pat<(int_nvvm_sust_b_2d_i8_trap 5478 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5479 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5480 Int16Regs:$r)>; 5481 5482def : Pat<(int_nvvm_sust_b_2d_i16_trap 5483 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5484 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5485 Int16Regs:$r)>; 5486 5487def : Pat<(int_nvvm_sust_b_2d_i32_trap 5488 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5489 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5490 Int32Regs:$r)>; 5491 5492def : Pat<(int_nvvm_sust_b_2d_i64_trap 5493 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5494 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5495 Int64Regs:$r)>; 5496 5497def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 5498 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5499 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5500 Int16Regs:$r, Int16Regs:$g)>; 5501 5502def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 5503 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5504 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5505 Int16Regs:$r, Int16Regs:$g)>; 5506 5507def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 5508 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5509 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5510 Int32Regs:$r, Int32Regs:$g)>; 5511 5512def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 5513 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5514 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5515 Int64Regs:$r, Int64Regs:$g)>; 5516 5517def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 5518 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5519 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5520 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5521 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5522 5523def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 5524 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5525 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5526 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5527 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5528 5529def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 5530 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5531 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5532 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5533 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5534 5535 5536 5537def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 5538 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5539 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 5540 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5541 Int16Regs:$r)>; 5542 5543def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 5544 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5545 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 5546 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5547 Int16Regs:$r)>; 5548 5549def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 5550 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5551 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 5552 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5553 Int32Regs:$r)>; 5554 5555def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 5556 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5557 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, 5558 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5559 Int64Regs:$r)>; 5560 5561def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 5562 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5563 Int16Regs:$r, Int16Regs:$g), 5564 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 5565 Int32Regs:$x, Int32Regs:$y, 5566 Int16Regs:$r, Int16Regs:$g)>; 5567 5568def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 5569 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5570 Int16Regs:$r, Int16Regs:$g), 5571 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 5572 Int32Regs:$x, Int32Regs:$y, 5573 Int16Regs:$r, Int16Regs:$g)>; 5574 5575def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 5576 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5577 Int32Regs:$g), 5578 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5579 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5580 5581def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 5582 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5583 Int64Regs:$g), 5584 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, 5585 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5586 5587def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 5588 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5589 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5590 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 5591 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5592 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5593 5594def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 5595 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5596 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5597 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 5598 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5599 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5600 5601def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 5602 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5603 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5604 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5605 Int32Regs:$x, Int32Regs:$y, 5606 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5607 5608 5609 5610def : Pat<(int_nvvm_sust_b_3d_i8_trap 5611 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5612 Int16Regs:$r), 5613 (SUST_B_3D_B8_TRAP_R Int64Regs:$s, 5614 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5615 Int16Regs:$r)>; 5616 5617def : Pat<(int_nvvm_sust_b_3d_i16_trap 5618 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5619 Int16Regs:$r), 5620 (SUST_B_3D_B16_TRAP_R Int64Regs:$s, 5621 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5622 Int16Regs:$r)>; 5623 5624def : Pat<(int_nvvm_sust_b_3d_i32_trap 5625 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5626 Int32Regs:$r), 5627 (SUST_B_3D_B32_TRAP_R Int64Regs:$s, 5628 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5629 Int32Regs:$r)>; 5630 5631def : Pat<(int_nvvm_sust_b_3d_i64_trap 5632 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5633 Int64Regs:$r), 5634 (SUST_B_3D_B64_TRAP_R Int64Regs:$s, 5635 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5636 Int64Regs:$r)>; 5637 5638def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 5639 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5640 Int16Regs:$r, Int16Regs:$g), 5641 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, 5642 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5643 Int16Regs:$r, Int16Regs:$g)>; 5644 5645def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 5646 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5647 Int16Regs:$r, Int16Regs:$g), 5648 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, 5649 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5650 Int16Regs:$r, Int16Regs:$g)>; 5651 5652def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 5653 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5654 Int32Regs:$r, Int32Regs:$g), 5655 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, 5656 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5657 Int32Regs:$r, Int32Regs:$g)>; 5658 5659def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 5660 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5661 Int64Regs:$r, Int64Regs:$g), 5662 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, 5663 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5664 Int64Regs:$r, Int64Regs:$g)>; 5665 5666def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 5667 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5668 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5669 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, 5670 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5671 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5672 5673def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 5674 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5675 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5676 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, 5677 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5678 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5679 5680def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 5681 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5682 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5683 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, 5684 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5685 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5686 5687 5688// .zero variant 5689def : Pat<(int_nvvm_sust_b_1d_i8_zero 5690 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5691 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5692 5693def : Pat<(int_nvvm_sust_b_1d_i16_zero 5694 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5695 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5696 5697def : Pat<(int_nvvm_sust_b_1d_i32_zero 5698 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5699 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5700 5701def : Pat<(int_nvvm_sust_b_1d_i64_zero 5702 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5703 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5704 5705def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 5706 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5707 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5708 Int16Regs:$r, Int16Regs:$g)>; 5709 5710def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 5711 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5712 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5713 Int16Regs:$r, Int16Regs:$g)>; 5714 5715def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 5716 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5717 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5718 Int32Regs:$r, Int32Regs:$g)>; 5719 5720def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 5721 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5722 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, 5723 Int64Regs:$r, Int64Regs:$g)>; 5724 5725def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 5726 Int64Regs:$s, Int32Regs:$x, 5727 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5728 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5729 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5730 5731def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 5732 Int64Regs:$s, Int32Regs:$x, 5733 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5734 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5735 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5736 5737def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 5738 Int64Regs:$s, Int32Regs:$x, 5739 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5740 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5741 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5742 5743 5744 5745def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 5746 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5747 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5748 Int16Regs:$r)>; 5749 5750def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 5751 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5752 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5753 Int16Regs:$r)>; 5754 5755def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 5756 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5757 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5758 Int32Regs:$r)>; 5759 5760def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 5761 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5762 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5763 Int64Regs:$r)>; 5764 5765def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 5766 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5767 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5768 Int16Regs:$r, Int16Regs:$g)>; 5769 5770def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 5771 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5772 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5773 Int16Regs:$r, Int16Regs:$g)>; 5774 5775def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 5776 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5777 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5778 Int32Regs:$r, Int32Regs:$g)>; 5779 5780def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 5781 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5782 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5783 Int64Regs:$r, Int64Regs:$g)>; 5784 5785def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 5786 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5787 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5788 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5789 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5790 5791def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 5792 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5793 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5794 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5795 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5796 5797def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 5798 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5799 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5800 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5801 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5802 5803 5804 5805def : Pat<(int_nvvm_sust_b_2d_i8_zero 5806 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5807 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5808 Int16Regs:$r)>; 5809 5810def : Pat<(int_nvvm_sust_b_2d_i16_zero 5811 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5812 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5813 Int16Regs:$r)>; 5814 5815def : Pat<(int_nvvm_sust_b_2d_i32_zero 5816 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5817 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5818 Int32Regs:$r)>; 5819 5820def : Pat<(int_nvvm_sust_b_2d_i64_zero 5821 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5822 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5823 Int64Regs:$r)>; 5824 5825def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 5826 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5827 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5828 Int16Regs:$r, Int16Regs:$g)>; 5829 5830def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 5831 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5832 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5833 Int16Regs:$r, Int16Regs:$g)>; 5834 5835def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 5836 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5837 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5838 Int32Regs:$r, Int32Regs:$g)>; 5839 5840def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 5841 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5842 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5843 Int64Regs:$r, Int64Regs:$g)>; 5844 5845def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 5846 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5847 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5848 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5849 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5850 5851def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 5852 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5853 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5854 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5855 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5856 5857def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 5858 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5859 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5860 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5861 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5862 5863 5864 5865def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 5866 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5867 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, 5868 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5869 Int16Regs:$r)>; 5870 5871def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 5872 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5873 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, 5874 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5875 Int16Regs:$r)>; 5876 5877def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 5878 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5879 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, 5880 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5881 Int32Regs:$r)>; 5882 5883def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 5884 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5885 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, 5886 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5887 Int64Regs:$r)>; 5888 5889def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 5890 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5891 Int16Regs:$r, Int16Regs:$g), 5892 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, 5893 Int32Regs:$x, Int32Regs:$y, 5894 Int16Regs:$r, Int16Regs:$g)>; 5895 5896def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 5897 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5898 Int16Regs:$r, Int16Regs:$g), 5899 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, 5900 Int32Regs:$x, Int32Regs:$y, 5901 Int16Regs:$r, Int16Regs:$g)>; 5902 5903def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 5904 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5905 Int32Regs:$g), 5906 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5907 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5908 5909def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 5910 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5911 Int64Regs:$g), 5912 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, 5913 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5914 5915def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 5916 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5917 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5918 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, 5919 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5920 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5921 5922def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 5923 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5924 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5925 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, 5926 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5927 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5928 5929def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 5930 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5931 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5932 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5933 Int32Regs:$x, Int32Regs:$y, 5934 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5935 5936 5937 5938def : Pat<(int_nvvm_sust_b_3d_i8_zero 5939 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5940 Int16Regs:$r), 5941 (SUST_B_3D_B8_ZERO_R Int64Regs:$s, 5942 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5943 Int16Regs:$r)>; 5944 5945def : Pat<(int_nvvm_sust_b_3d_i16_zero 5946 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5947 Int16Regs:$r), 5948 (SUST_B_3D_B16_ZERO_R Int64Regs:$s, 5949 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5950 Int16Regs:$r)>; 5951 5952def : Pat<(int_nvvm_sust_b_3d_i32_zero 5953 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5954 Int32Regs:$r), 5955 (SUST_B_3D_B32_ZERO_R Int64Regs:$s, 5956 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5957 Int32Regs:$r)>; 5958 5959def : Pat<(int_nvvm_sust_b_3d_i64_zero 5960 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5961 Int64Regs:$r), 5962 (SUST_B_3D_B64_ZERO_R Int64Regs:$s, 5963 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5964 Int64Regs:$r)>; 5965 5966def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 5967 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5968 Int16Regs:$r, Int16Regs:$g), 5969 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, 5970 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5971 Int16Regs:$r, Int16Regs:$g)>; 5972 5973def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 5974 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5975 Int16Regs:$r, Int16Regs:$g), 5976 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, 5977 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5978 Int16Regs:$r, Int16Regs:$g)>; 5979 5980def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 5981 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5982 Int32Regs:$r, Int32Regs:$g), 5983 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, 5984 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5985 Int32Regs:$r, Int32Regs:$g)>; 5986 5987def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 5988 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5989 Int64Regs:$r, Int64Regs:$g), 5990 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, 5991 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5992 Int64Regs:$r, Int64Regs:$g)>; 5993 5994def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 5995 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5996 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5997 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, 5998 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5999 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6000 6001def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 6002 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6003 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6004 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, 6005 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6006 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6007 6008def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 6009 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6010 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6011 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, 6012 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6013 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6014 6015 6016 6017 6018def : Pat<(int_nvvm_sust_p_1d_i8_trap 6019 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6020 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6021 6022def : Pat<(int_nvvm_sust_p_1d_i16_trap 6023 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6024 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6025 6026def : Pat<(int_nvvm_sust_p_1d_i32_trap 6027 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6028 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6029 6030def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 6031 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6032 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 6033 Int16Regs:$r, Int16Regs:$g)>; 6034 6035def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 6036 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6037 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 6038 Int16Regs:$r, Int16Regs:$g)>; 6039 6040def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 6041 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6042 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 6043 Int32Regs:$r, Int32Regs:$g)>; 6044 6045def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 6046 Int64Regs:$s, Int32Regs:$x, 6047 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6048 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 6049 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6050 6051def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 6052 Int64Regs:$s, Int32Regs:$x, 6053 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6054 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 6055 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6056 6057def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 6058 Int64Regs:$s, Int32Regs:$x, 6059 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6060 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 6061 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6062 6063 6064 6065def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 6066 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6067 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6068 Int16Regs:$r)>; 6069 6070def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 6071 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6072 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6073 Int16Regs:$r)>; 6074 6075def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 6076 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6077 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6078 Int32Regs:$r)>; 6079 6080def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 6081 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6082 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6083 Int16Regs:$r, Int16Regs:$g)>; 6084 6085def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 6086 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6087 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6088 Int16Regs:$r, Int16Regs:$g)>; 6089 6090def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 6091 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6092 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6093 Int32Regs:$r, Int32Regs:$g)>; 6094 6095def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 6096 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6097 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6098 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6099 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6100 6101def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 6102 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6103 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6104 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6105 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6106 6107def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 6108 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6109 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6110 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6111 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6112 6113 6114 6115def : Pat<(int_nvvm_sust_p_2d_i8_trap 6116 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6117 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6118 Int16Regs:$r)>; 6119 6120def : Pat<(int_nvvm_sust_p_2d_i16_trap 6121 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6122 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6123 Int16Regs:$r)>; 6124 6125def : Pat<(int_nvvm_sust_p_2d_i32_trap 6126 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6127 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6128 Int32Regs:$r)>; 6129 6130def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 6131 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6132 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6133 Int16Regs:$r, Int16Regs:$g)>; 6134 6135def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 6136 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6137 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6138 Int16Regs:$r, Int16Regs:$g)>; 6139 6140def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 6141 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6142 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6143 Int32Regs:$r, Int32Regs:$g)>; 6144 6145def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 6146 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6147 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6148 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6149 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6150 6151def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 6152 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6153 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6154 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6155 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6156 6157def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 6158 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6159 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6160 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6161 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6162 6163 6164 6165def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 6166 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6167 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 6168 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6169 Int16Regs:$r)>; 6170 6171def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 6172 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6173 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 6174 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6175 Int16Regs:$r)>; 6176 6177def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 6178 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6179 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 6180 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6181 Int32Regs:$r)>; 6182 6183def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 6184 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6185 Int16Regs:$r, Int16Regs:$g), 6186 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 6187 Int32Regs:$x, Int32Regs:$y, 6188 Int16Regs:$r, Int16Regs:$g)>; 6189 6190def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 6191 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6192 Int16Regs:$r, Int16Regs:$g), 6193 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 6194 Int32Regs:$x, Int32Regs:$y, 6195 Int16Regs:$r, Int16Regs:$g)>; 6196 6197def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 6198 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6199 Int32Regs:$g), 6200 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6201 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6202 6203def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 6204 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6205 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6206 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 6207 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6208 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6209 6210def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 6211 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6212 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6213 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 6214 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6215 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6216 6217def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 6218 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6219 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6220 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6221 Int32Regs:$x, Int32Regs:$y, 6222 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6223 6224 6225 6226def : Pat<(int_nvvm_sust_p_3d_i8_trap 6227 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6228 Int16Regs:$r), 6229 (SUST_P_3D_B8_TRAP_R Int64Regs:$s, 6230 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6231 Int16Regs:$r)>; 6232 6233def : Pat<(int_nvvm_sust_p_3d_i16_trap 6234 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6235 Int16Regs:$r), 6236 (SUST_P_3D_B16_TRAP_R Int64Regs:$s, 6237 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6238 Int16Regs:$r)>; 6239 6240def : Pat<(int_nvvm_sust_p_3d_i32_trap 6241 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6242 Int32Regs:$r), 6243 (SUST_P_3D_B32_TRAP_R Int64Regs:$s, 6244 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6245 Int32Regs:$r)>; 6246 6247def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 6248 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6249 Int16Regs:$r, Int16Regs:$g), 6250 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, 6251 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6252 Int16Regs:$r, Int16Regs:$g)>; 6253 6254def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 6255 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6256 Int16Regs:$r, Int16Regs:$g), 6257 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, 6258 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6259 Int16Regs:$r, Int16Regs:$g)>; 6260 6261def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 6262 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6263 Int32Regs:$r, Int32Regs:$g), 6264 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, 6265 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6266 Int32Regs:$r, Int32Regs:$g)>; 6267 6268def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 6269 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6270 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6271 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, 6272 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6273 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6274 6275def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 6276 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6277 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6278 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, 6279 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6280 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6281 6282def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 6283 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6284 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6285 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, 6286 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6287 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6288 6289//----------------------------------- 6290// Read Special Registers 6291//----------------------------------- 6292 6293class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]> 6294 : NVPTXInst<(outs Int64Regs:$d), (ins), 6295 !strconcat("mov.u64 \t$d, %", regname, ";"), 6296 [(set Int64Regs:$d, (intop))]>, 6297 Requires<Preds>; 6298 6299class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]> 6300 : NVPTXInst<(outs Int32Regs:$d), (ins), 6301 !strconcat("mov.u32 \t$d, %", regname, ";"), 6302 [(set Int32Regs:$d, (intop))]>, 6303 Requires<Preds>; 6304 6305multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> { 6306 foreach suffix = ["x", "y", "z", "w"] in { 6307 defvar reg = regname # "." # suffix; 6308 defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix); 6309 def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>; 6310 } 6311} 6312 6313// TODO Add read vector-version of special registers 6314 6315defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">; 6316defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">; 6317defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">; 6318defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">; 6319 6320defm INT_PTX_SREG_CLUSTERID : 6321 PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>; 6322defm INT_PTX_SREG_NCLUSTERID : 6323 PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>; 6324defm INT_PTX_SREG_CLUSTER_CTAID : 6325 PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>; 6326defm INT_PTX_SREG_CLUSTER_NCTAID: 6327 PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>; 6328 6329def INT_PTX_SREG_CLUSTER_CTARANK : 6330 PTX_READ_SREG_R32<"cluster_ctarank", 6331 int_nvvm_read_ptx_sreg_cluster_ctarank, 6332 [hasSM<90>, hasPTX<78>]>; 6333def INT_PTX_SREG_CLUSTER_NCTARANK: 6334 PTX_READ_SREG_R32<"cluster_nctarank", 6335 int_nvvm_read_ptx_sreg_cluster_nctarank, 6336 [hasSM<90>, hasPTX<78>]>; 6337 6338 6339def INT_PTX_SREG_LANEID : 6340 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 6341def INT_PTX_SREG_WARPID : 6342 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 6343def INT_PTX_SREG_NWARPID : 6344 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 6345def INT_PTX_SREG_SMID : 6346 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 6347def INT_PTX_SREG_NSMID : 6348 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 6349def INT_PTX_SREG_GRIDID : 6350 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 6351 6352def INT_PTX_SREG_LANEMASK_EQ : 6353 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 6354def INT_PTX_SREG_LANEMASK_LE : 6355 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 6356def INT_PTX_SREG_LANEMASK_LT : 6357 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 6358def INT_PTX_SREG_LANEMASK_GE : 6359 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 6360def INT_PTX_SREG_LANEMASK_GT : 6361 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 6362 6363def INT_PTX_SREG_CLOCK : 6364 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 6365def INT_PTX_SREG_CLOCK64 : 6366 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 6367 6368def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 6369def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 6370def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 6371def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 6372 6373// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 6374// handle the constant. 6375def INT_PTX_SREG_WARPSIZE : 6376 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 6377 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 6378 6379// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 6380// In addition to target-independent fields provided by WMMA_REGS, it adds 6381// the fields commonly used to implement specific PTX instruction -- register 6382// types and names, constraints, parts of assembly, etc. 6383class WMMA_REGINFO<WMMA_REGS r, string op> 6384 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 6385 // NVPTX register types used to carry fragment data. 6386 NVPTXRegClass regclass = !cond( 6387 !eq(ptx_elt_type, "f16") : Int32Regs, 6388 !eq(ptx_elt_type, "f32") : Float32Regs, 6389 !eq(ptx_elt_type, "f64") : Float64Regs, 6390 !eq(ptx_elt_type, "bf16") : Int32Regs, 6391 !eq(ptx_elt_type, "tf32") : Int32Regs, 6392 !eq(ptx_elt_type, "s32") : Int32Regs, 6393 !eq(ptx_elt_type, "b16") : Int32Regs, 6394 !eq(ptx_elt_type, "s8") : Int32Regs, 6395 !eq(ptx_elt_type, "u8") : Int32Regs, 6396 !eq(ptx_elt_type, "s4") : Int32Regs, 6397 !eq(ptx_elt_type, "u4") : Int32Regs, 6398 !eq(ptx_elt_type, "b1") : Int32Regs); 6399 6400 // Instruction input/output arguments for the fragment. 6401 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 6402 6403 // List of register names for the fragment -- ["ra0", "ra1",...] 6404 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 6405 6406 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 6407 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 6408 6409 // Predicates for particular fragment variant. Technically those are 6410 // per-instruction predicates, but currently all fragments that can be used in 6411 // a given instruction are subject to the same constraints, so an instruction 6412 // can use predicates from any of its fragments. If/when this is no 6413 // longer the case, we can concat all per-fragment predicates to enforce that 6414 // all fragments of the instruction are viable. 6415 list<Predicate> Predicates = !cond( 6416 // fp16 -> fp16/fp32 @ m16n16k16 6417 !and(!eq(geom, "m16n16k16"), 6418 !or(!eq(ptx_elt_type, "f16"), 6419 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>], 6420 6421 !and(!eq(geom,"m8n8k4"), 6422 !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>], 6423 6424 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 6425 !and(!or(!eq(geom, "m8n32k16"), 6426 !eq(geom, "m32n8k16")), 6427 !or(!eq(ptx_elt_type, "f16"), 6428 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>], 6429 6430 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 6431 !and(!or(!eq(geom,"m16n16k16"), 6432 !eq(geom,"m8n32k16"), 6433 !eq(geom,"m32n8k16")), 6434 !or(!eq(ptx_elt_type, "u8"), 6435 !eq(ptx_elt_type, "s8"), 6436 !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>], 6437 6438 !and(!or(!eq(geom,"m16n16k16"), 6439 !eq(geom,"m8n32k16"), 6440 !eq(geom,"m32n8k16")), 6441 !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>], 6442 6443 !and(!eq(geom,"m16n16k8"), 6444 !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>], 6445 6446 !and(!eq(geom,"m16n16k8"), 6447 !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>], 6448 6449 // b1 -> s32 @ m8n8k128(b1) 6450 !and(!ne(op,"mma"), 6451 !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>], 6452 6453 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 6454 !and(!ne(op,"mma"), 6455 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>], 6456 6457 !or(!eq(geom,"m16n8k8"), 6458 !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>], 6459 6460 !and(!ne(ptx_elt_type,"f64"), 6461 !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>], 6462 6463 // mma m8n8k32 requires higher PTX version 6464 !and(!eq(op,"mma"), 6465 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>], 6466 6467 !and(!eq(ptx_elt_type,"f64"), 6468 !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>], 6469 6470 !and(!eq(op,"mma"), 6471 !or(!eq(geom, "m16n8k16"), 6472 !eq(geom, "m16n8k4"), 6473 !eq(geom, "m16n8k32"), 6474 !eq(geom, "m16n8k64"), 6475 !eq(geom, "m8n8k128"), 6476 !eq(geom, "m16n8k128"), 6477 !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>], 6478 6479 !and(!eq(op,"ldmatrix"), 6480 !eq(ptx_elt_type,"b16"), 6481 !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]); 6482 6483 // template DAGs for instruction inputs/output. 6484 dag Outs = !dag(outs, ptx_regs, reg_names); 6485 dag Ins = !dag(ins, ptx_regs, reg_names); 6486} 6487 6488// Convert dag of arguments into a dag to match given intrinsic. 6489class BuildPatternI<Intrinsic Intr, dag Ins> { 6490 // Build a dag pattern that matches the intrinsic call. 6491 dag ret = !foreach(tmp, Ins, 6492 !subst(imem, ADDRvar, 6493 !subst(MEMri64, ADDRri64, 6494 !subst(MEMri, ADDRri, 6495 !subst(ins, Intr, tmp))))); 6496} 6497 6498// Same as above, but uses PatFrag instead of an Intrinsic. 6499class BuildPatternPF<PatFrag Intr, dag Ins> { 6500 // Build a dag pattern that matches the intrinsic call. 6501 dag ret = !foreach(tmp, Ins, 6502 !subst(imem, ADDRvar, 6503 !subst(MEMri64, ADDRri64, 6504 !subst(MEMri, ADDRri, 6505 !subst(ins, Intr, tmp))))); 6506} 6507 6508// Common WMMA-related fields used for building patterns for all MMA instructions. 6509class WMMA_INSTR<string _Intr, list<dag> _Args> 6510 : NVPTXInst<(outs), (ins), "?", []> { 6511 Intrinsic Intr = !cast<Intrinsic>(_Intr); 6512 // Concatenate all arguments into a single dag. 6513 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 6514 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 6515 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 6516} 6517 6518// 6519// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6520// 6521 6522class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 6523 DAGOperand SrcOp> 6524 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 6525 [!con((ins SrcOp:$src), 6526 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6527 Requires<Frag.Predicates> { 6528 // Load/store intrinsics are overloaded on pointer's address space. 6529 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6530 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6531 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 6532 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 6533 // Build PatFrag that only matches particular address space. 6534 PatFrag IntrFrag = PatFrag<PFOperands, 6535 PFOperandsIntr, 6536 !cond(!eq(Space, ".shared"): AS_match.shared, 6537 !eq(Space, ".global"): AS_match.global, 6538 true: AS_match.generic)>; 6539 // Build AS-constrained pattern. 6540 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6541 6542 let OutOperandList = Frag.Outs; 6543 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6544 let AsmString = "wmma.load." 6545 # Frag.frag 6546 # ".sync" 6547 # "${ptx:aligned}" 6548 # "." # Layout 6549 # "." # Frag.geom 6550 # Space 6551 # "." # Frag.ptx_elt_type # " \t" 6552 # Frag.regstring 6553 # ", [$src]" 6554 # !if(WithStride, ", $ldm", "") 6555 # ";"; 6556} 6557 6558// 6559// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6560// 6561class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 6562 bit WithStride, DAGOperand DstOp> 6563 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 6564 [!con((ins DstOp:$dst), 6565 Frag.Ins, 6566 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6567 Requires<Frag.Predicates> { 6568 6569 // Load/store intrinsics are overloaded on pointer's address space. 6570 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6571 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6572 dag PFOperands = !con((ops node:$dst), 6573 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 6574 !if(WithStride, (ops node:$ldm), (ops))); 6575 // Build PatFrag that only matches particular address space. 6576 PatFrag IntrFrag = PatFrag<PFOperands, 6577 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 6578 !cond(!eq(Space, ".shared"): AS_match.shared, 6579 !eq(Space, ".global"): AS_match.global, 6580 true: AS_match.generic)>; 6581 // Build AS-constrained pattern. 6582 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6583 6584 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6585 let OutOperandList = (outs); 6586 let AsmString = "wmma.store.d.sync" 6587 # "${ptx:aligned}" 6588 # "." # Layout 6589 # "." # Frag.geom 6590 # Space 6591 # "." # Frag.ptx_elt_type 6592 # " \t[$dst]," 6593 # Frag.regstring 6594 # !if(WithStride, ", $ldm", "") 6595 # ";"; 6596} 6597 6598// Create all load/store variants 6599defset list<WMMA_INSTR> MMA_LDSTs = { 6600 foreach layout = ["row", "col"] in { 6601 foreach stride = [false, true] in { 6602 foreach space = [".global", ".shared", ""] in { 6603 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6604 foreach frag = NVVM_MMA_OPS.all_ld_ops in 6605 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6606 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 6607 foreach frag = NVVM_MMA_OPS.all_st_ops in 6608 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6609 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 6610 } // addr 6611 } // space 6612 } // stride 6613 } // layout 6614} // defset 6615 6616// B1 instruction variants need extra constraints. 6617class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 6618 string Op = b1op; 6619 WMMA_REGINFO Frag = FragA; 6620 list<Predicate> ret = !listconcat( 6621 FragA.Predicates, 6622 !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[]) 6623 ); 6624} 6625// WMMA.MMA 6626class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6627 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6628 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 6629 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 6630 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6631 // Requires does not seem to have effect on Instruction w/o Patterns. 6632 // We set it here anyways and propagate to the Pat<> we construct below. 6633 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6634 let OutOperandList = FragD.Outs; 6635 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6636 string TypeList = !cond( 6637 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 6638 # "." # FragC.ptx_elt_type, 6639 1: "." # FragD.ptx_elt_type 6640 # "." # FragA.ptx_elt_type 6641 # "." # FragB.ptx_elt_type 6642 # "." # FragC.ptx_elt_type, 6643 ); 6644 let AsmString = "wmma.mma" 6645 # b1op 6646 # ".sync" 6647 # "${ptx:aligned}" 6648 # "." # ALayout 6649 # "." # BLayout 6650 # "." # FragA.geom 6651 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 6652 # TypeList 6653 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 6654 # FragD.regstring # ",\n\t\t" 6655 # FragA.regstring # ",\n\t\t" 6656 # FragB.regstring # ",\n\t\t" 6657 # FragC.regstring # ";"; 6658} 6659 6660defset list<WMMA_INSTR> WMMAs = { 6661 foreach layout_a = ["row", "col"] in { 6662 foreach layout_b = ["row", "col"] in { 6663 foreach satf = [0, 1] in { 6664 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 6665 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 6666 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6667 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 6668 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 6669 WMMA_REGINFO<op[1], "wmma.mma">, 6670 WMMA_REGINFO<op[2], "wmma.mma">, 6671 WMMA_REGINFO<op[3], "wmma.mma">, 6672 layout_a, layout_b, satf, rnd, b1op>; 6673 } 6674 } // b1op 6675 } // op 6676 } // rnd 6677 } // satf 6678 } // layout_b 6679 } // layout_a 6680} // defset 6681 6682// MMA 6683class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6684 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6685 string ALayout, string BLayout, int Satfinite, string b1op> 6686 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 6687 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6688 // Requires does not seem to have effect on Instruction w/o Patterns. 6689 // We set it here anyways and propagate to the Pat<> we construct below. 6690 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6691 let OutOperandList = FragD.Outs; 6692 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6693 string TypeList = "." # FragD.ptx_elt_type 6694 # "." # FragA.ptx_elt_type 6695 # "." # FragB.ptx_elt_type 6696 # "." # FragC.ptx_elt_type; 6697 let AsmString = "mma.sync.aligned." 6698 # FragA.geom 6699 # "." # ALayout 6700 # "." # BLayout 6701 # !if(Satfinite, ".satfinite", "") 6702 # TypeList 6703 # b1op # "\n\t\t" 6704 # FragD.regstring # ",\n\t\t" 6705 # FragA.regstring # ",\n\t\t" 6706 # FragB.regstring # ",\n\t\t" 6707 # FragC.regstring # ";"; 6708} 6709 6710defset list<WMMA_INSTR> MMAs = { 6711 foreach layout_a = ["row", "col"] in { 6712 foreach layout_b = ["row", "col"] in { 6713 foreach satf = [0, 1] in { 6714 foreach op = NVVM_MMA_OPS.all_mma_ops in { 6715 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6716 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 6717 def : MMA<WMMA_REGINFO<op[0], "mma">, 6718 WMMA_REGINFO<op[1], "mma">, 6719 WMMA_REGINFO<op[2], "mma">, 6720 WMMA_REGINFO<op[3], "mma">, 6721 layout_a, layout_b, satf, b1op>; 6722 } 6723 } // b1op 6724 } // op 6725 } // satf 6726 } // layout_b 6727 } // layout_a 6728} // defset 6729 6730// 6731// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 6732// 6733class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, 6734 DAGOperand SrcOp> 6735 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, 6736 Requires<Frag.Predicates> { 6737 // Build PatFrag that only matches particular address space. 6738 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), 6739 !cond(!eq(Space, ".shared"): AS_match.shared, 6740 true: AS_match.generic)>; 6741 // Build AS-constrained pattern. 6742 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6743 6744 let OutOperandList = Frag.Outs; 6745 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6746 let AsmString = "ldmatrix.sync.aligned." 6747 # Frag.geom 6748 # "." # Frag.frag 6749 # !if(Transposed, ".trans", "") 6750 # Space 6751 # "." # Frag.ptx_elt_type 6752 # " " # Frag.regstring # ", [$src];"; 6753} 6754 6755// Create all ldmatrix variants 6756defset list<WMMA_INSTR> LDMATRIXs = { 6757 foreach transposed = [false, true] in { 6758 foreach space = [".shared", ""] in { 6759 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6760 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in 6761 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then 6762 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, 6763 addr>; 6764 } // addr 6765 } // space 6766 } // transposed 6767} // defset 6768 6769// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 6770// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 6771// the instruction record. 6772class MMA_PAT<WMMA_INSTR wi> 6773 : Pat<wi.IntrinsicPattern, 6774 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 6775 (wi ptx.version))>, 6776 Requires<wi.Predicates>; 6777 6778// Build intrinsic->instruction patterns for all MMA instructions. 6779foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in 6780 def : MMA_PAT<mma>; 6781 6782multiclass MAPA<string suffix, Intrinsic Intr> { 6783 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b), 6784 "mapa" # suffix # ".u32\t$d, $a, $b;", 6785 [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>, 6786 Requires<[hasSM<90>, hasPTX<78>]>; 6787 def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b), 6788 "mapa" # suffix # ".u32\t$d, $a, $b;", 6789 [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>, 6790 Requires<[hasSM<90>, hasPTX<78>]>; 6791 def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b), 6792 "mapa" # suffix # ".u64\t$d, $a, $b;", 6793 [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>, 6794 Requires<[hasSM<90>, hasPTX<78>]>; 6795 def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b), 6796 "mapa" # suffix # ".u64\t$d, $a, $b;", 6797 [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>, 6798 Requires<[hasSM<90>, hasPTX<78>]>; 6799} 6800 6801defm mapa : MAPA<"", int_nvvm_mapa>; 6802defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>; 6803 6804 6805multiclass GETCTARANK<string suffix, Intrinsic Intr> { 6806 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 6807 "getctarank" # suffix # ".u32\t$d, $a;", 6808 [(set Int32Regs:$d, (Intr Int32Regs:$a))]>, 6809 Requires<[hasSM<90>, hasPTX<78>]>; 6810 def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 6811 "getctarank" # suffix # ".u64\t$d, $a;", 6812 [(set Int32Regs:$d, (Intr Int64Regs:$a))]>, 6813 Requires<[hasSM<90>, hasPTX<78>]>; 6814} 6815 6816defm getctarank : GETCTARANK<"", int_nvvm_getctarank>; 6817defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>; 6818 6819def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), 6820 "mov.pred\t$d, %is_explicit_cluster;", 6821 [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, 6822 Requires<[hasSM<90>, hasPTX<78>]>; 6823 6824// setmaxnreg inc/dec intrinsics 6825let isConvergent = true in { 6826multiclass SET_MAXNREG<string Action, Intrinsic Intr> { 6827 def : NVPTXInst<(outs), (ins i32imm:$reg_count), 6828 "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;", 6829 [(Intr timm:$reg_count)]>, 6830 Requires<[hasSM90a, hasPTX<80>]>; 6831} 6832 6833defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>; 6834defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>; 6835} // isConvergent 6836