1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX<60>, hasSM<30>]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX<60>, hasSM<30>]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX<60>, hasSM<30>]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX<60>, hasSM<30>]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX<60>, hasSM<30>]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX<60>, hasSM<30>]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX<60>, hasSM<30>]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX<60>, hasSM<30>]>; 135class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr, 136 list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>: 137 NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>, 138 Requires<Preds>; 139 140def barrier_cluster_arrive: 141 INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>; 142def barrier_cluster_arrive_relaxed: 143 INT_BARRIER_CLUSTER<"arrive.relaxed", 144 int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>; 145def barrier_cluster_wait: 146 INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>; 147 148class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 149 bit offset_imm, bit mask_imm, bit threadmask_imm> 150 : NVPTXInst<(outs), (ins), "?", []> { 151 NVPTXRegClass rc = !cond( 152 !eq(reg, "i32"): Int32Regs, 153 !eq(reg, "f32"): Float32Regs); 154 string IntrName = "int_nvvm_shfl_" 155 # !if(sync, "sync_", "") 156 # mode 157 # "_" # reg 158 # !if(return_pred, "p", ""); 159 Intrinsic Intr = !cast<Intrinsic>(IntrName); 160 let InOperandList = !con( 161 !if(sync, 162 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 163 (ins)), 164 (ins rc:$src), 165 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 166 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 167 ); 168 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 169 let AsmString = "shfl." 170 # !if(sync, "sync.", "") 171 # mode # ".b32\t" 172 # "$dst" 173 # !if(return_pred, "|$pred", "") # ", " 174 # "$src, $offset, $mask" 175 # !if(sync, ", $threadmask", "") 176 # ";" 177 ; 178 let Pattern = [!con( 179 !foreach(tmp, OutOperandList, 180 !subst(outs, set, 181 !subst(i32imm, imm, tmp))), 182 (set !foreach(tmp, InOperandList, 183 !subst(ins, Intr, 184 !subst(i32imm, imm, tmp)))) 185 )]; 186} 187 188foreach sync = [false, true] in { 189 foreach mode = ["up", "down", "bfly", "idx"] in { 190 foreach regclass = ["i32", "f32"] in { 191 foreach return_pred = [false, true] in { 192 foreach offset_imm = [false, true] in { 193 foreach mask_imm = [false, true] in { 194 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 195 def : SHFL_INSTR<sync, mode, regclass, return_pred, 196 offset_imm, mask_imm, threadmask_imm>, 197 Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>; 198 } 199 } 200 } 201 } 202 } 203 } 204} 205 206// vote.{all,any,uni,ballot} 207multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 208 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 209 "vote." # mode # " \t$dest, $pred;", 210 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 211 Requires<[hasPTX<60>, hasSM<30>]>; 212} 213 214defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 215defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 216defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 217defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 218 219// vote.sync.{all,any,uni,ballot} 220multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 221 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 222 "vote.sync." # mode # " \t$dest, $pred, $mask;", 223 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 224 Requires<[hasPTX<60>, hasSM<30>]>; 225 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 226 "vote.sync." # mode #" \t$dest, $pred, $mask;", 227 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 228 Requires<[hasPTX<60>, hasSM<30>]>; 229} 230 231defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 232defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 233defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 234defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 235 236multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 237 Operand ImmOp> { 238 def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), 239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 240 [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, 241 Requires<[hasPTX<60>, hasSM<70>]>; 242 def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), 243 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 244 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 245 Requires<[hasPTX<60>, hasSM<70>]>; 246 def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), 247 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 248 [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, 249 Requires<[hasPTX<60>, hasSM<70>]>; 250 def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), 251 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 252 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 253 Requires<[hasPTX<60>, hasSM<70>]>; 254} 255 256defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 257 i32imm>; 258defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 259 i64imm>; 260 261multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 262 Operand ImmOp> { 263 def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 264 (ins i32imm:$mask, ImmOp:$value), 265 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 266 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 267 Requires<[hasPTX<60>, hasSM<70>]>; 268 def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 269 (ins Int32Regs:$mask, ImmOp:$value), 270 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 271 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 272 Requires<[hasPTX<60>, hasSM<70>]>; 273 def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 274 (ins i32imm:$mask, regclass:$value), 275 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 276 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 277 Requires<[hasPTX<60>, hasSM<70>]>; 278 def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 279 (ins Int32Regs:$mask, regclass:$value), 280 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 281 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 282 Requires<[hasPTX<60>, hasSM<70>]>; 283} 284defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 285 i32imm>; 286defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 287 i64imm>; 288 289multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 290 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 291 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 292 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, 293 Requires<[hasPTX<70>, hasSM<80>]>; 294} 295 296defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 297defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 298defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 299defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 300defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 301defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 302defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 303defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 304 305} // isConvergent = true 306 307//----------------------------------- 308// Explicit Memory Fence Functions 309//----------------------------------- 310class MEMBAR<string StrOp, Intrinsic IntOP> : 311 NVPTXInst<(outs), (ins), 312 StrOp, [(IntOP)]>; 313 314def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 315def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 316def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 317 318def INT_FENCE_SC_CLUSTER: 319 MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>, 320 Requires<[hasPTX<78>, hasSM<90>]>; 321 322//----------------------------------- 323// Async Copy Functions 324//----------------------------------- 325 326multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 327 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 328 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 329 [(Intrin Int32Regs:$addr)]>, 330 Requires<[hasPTX<70>, hasSM<80>]>; 331 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 332 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 333 [(Intrin Int64Regs:$addr)]>, 334 Requires<[hasPTX<70>, hasSM<80>]>; 335} 336 337defm CP_ASYNC_MBARRIER_ARRIVE : 338 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 339defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 340 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 341defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 342 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 343defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 344 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 345 346multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> { 347 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 348 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), 349 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 350 Requires<[hasPTX<70>, hasSM<80>]>; 351 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 352 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), 353 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 354 Requires<[hasPTX<70>, hasSM<80>]>; 355 // Variant with src_size parameter 356 def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size), 357 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 358 [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>, 359 Requires<[hasPTX<70>, hasSM<80>]>; 360 def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size), 361 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 362 [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>, 363 Requires<[hasPTX<70>, hasSM<80>]>; 364 def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size), 365 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 366 [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>, 367 Requires<[hasPTX<70>, hasSM<80>]>; 368 def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size), 369 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 370 [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>, 371 Requires<[hasPTX<70>, hasSM<80>]>; 372} 373 374defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 375 CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4, 376 int_nvvm_cp_async_ca_shared_global_4_s>; 377 378defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 379 CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8, 380 int_nvvm_cp_async_ca_shared_global_8_s>; 381 382defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 383 CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16, 384 int_nvvm_cp_async_ca_shared_global_16_s>; 385 386defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 387 CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16, 388 int_nvvm_cp_async_cg_shared_global_16_s>; 389 390def CP_ASYNC_COMMIT_GROUP : 391 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 392 Requires<[hasPTX<70>, hasSM<80>]>; 393 394def CP_ASYNC_WAIT_GROUP : 395 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 396 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, 397 Requires<[hasPTX<70>, hasSM<80>]>; 398 399def CP_ASYNC_WAIT_ALL : 400 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 401 [(int_nvvm_cp_async_wait_all)]>, 402 Requires<[hasPTX<70>, hasSM<80>]>; 403 404//----------------------------------- 405// MBarrier Functions 406//----------------------------------- 407 408multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 409 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 410 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 411 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, 412 Requires<[hasPTX<70>, hasSM<80>]>; 413 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 414 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 415 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, 416 Requires<[hasPTX<70>, hasSM<80>]>; 417} 418 419defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 420defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 421 int_nvvm_mbarrier_init_shared>; 422 423multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 424 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 425 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 426 [(Intrin Int32Regs:$addr)]>, 427 Requires<[hasPTX<70>, hasSM<80>]>; 428 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 429 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 430 [(Intrin Int64Regs:$addr)]>, 431 Requires<[hasPTX<70>, hasSM<80>]>; 432} 433 434defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 435defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 436 int_nvvm_mbarrier_inval_shared>; 437 438multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 439 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 440 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 441 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 442 Requires<[hasPTX<70>, hasSM<80>]>; 443 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 444 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 445 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 446 Requires<[hasPTX<70>, hasSM<80>]>; 447} 448 449defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 450defm MBARRIER_ARRIVE_SHARED : 451 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 452 453multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 454 def _32 : NVPTXInst<(outs Int64Regs:$state), 455 (ins Int32Regs:$addr, Int32Regs:$count), 456 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 457 ".b64 $state, [$addr], $count;"), 458 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 459 Requires<[hasPTX<70>, hasSM<80>]>; 460 def _64 : NVPTXInst<(outs Int64Regs:$state), 461 (ins Int64Regs:$addr, Int32Regs:$count), 462 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 463 ".b64 $state, [$addr], $count;"), 464 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 465 Requires<[hasPTX<70>, hasSM<80>]>; 466} 467 468defm MBARRIER_ARRIVE_NOCOMPLETE : 469 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 470defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 471 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 472 473multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 474 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 475 !strconcat("mbarrier.arrive_drop", AddrSpace, 476 ".b64 $state, [$addr];"), 477 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 478 Requires<[hasPTX<70>, hasSM<80>]>; 479 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 480 !strconcat("mbarrier.arrive_drop", AddrSpace, 481 ".b64 $state, [$addr];"), 482 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 483 Requires<[hasPTX<70>, hasSM<80>]>; 484} 485 486defm MBARRIER_ARRIVE_DROP : 487 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 488defm MBARRIER_ARRIVE_DROP_SHARED : 489 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 490 491multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 492 def _32 : NVPTXInst<(outs Int64Regs:$state), 493 (ins Int32Regs:$addr, Int32Regs:$count), 494 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 495 ".b64 $state, [$addr], $count;"), 496 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 497 Requires<[hasPTX<70>, hasSM<80>]>; 498 def _64 : NVPTXInst<(outs Int64Regs:$state), 499 (ins Int64Regs:$addr, Int32Regs:$count), 500 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 501 ".b64 $state, [$addr], $count;"), 502 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 503 Requires<[hasPTX<70>, hasSM<80>]>; 504} 505 506defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 507 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 508defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 509 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 510 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 511 512multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 513 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 514 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 515 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, 516 Requires<[hasPTX<70>, hasSM<80>]>; 517 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 518 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 519 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, 520 Requires<[hasPTX<70>, hasSM<80>]>; 521} 522 523defm MBARRIER_TEST_WAIT : 524 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 525defm MBARRIER_TEST_WAIT_SHARED : 526 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 527 528class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 529 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 530 "mbarrier.pending_count.b64 $res, $state;", 531 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, 532 Requires<[hasPTX<70>, hasSM<80>]>; 533 534def MBARRIER_PENDING_COUNT : 535 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 536 537//----------------------------------- 538// Math Functions 539//----------------------------------- 540 541// Map min(1.0, max(0.0, x)) to sat(x) 542// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 543// NaN 544// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 545// Same story for fmax, fmin. 546 547def : Pat<(int_nvvm_fmin_f immFloat1, 548 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 549 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 550def : Pat<(int_nvvm_fmin_f immFloat1, 551 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 552 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 553def : Pat<(int_nvvm_fmin_f 554 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 555 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 556def : Pat<(int_nvvm_fmin_f 557 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 558 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 559 560def : Pat<(int_nvvm_fmin_d immDouble1, 561 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 562 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 563def : Pat<(int_nvvm_fmin_d immDouble1, 564 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 565 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 566def : Pat<(int_nvvm_fmin_d 567 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 568 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 569def : Pat<(int_nvvm_fmin_d 570 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 571 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 572 573 574// We need a full string for OpcStr here because we need to deal with case like 575// INT_PTX_RECIP. 576class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 577 NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 578 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 579 OpcStr, 580 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>, 581 Requires<Preds>; 582 583// We need a full string for OpcStr here because we need to deal with the case 584// like INT_PTX_NATIVE_POWR_F. 585class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 586 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP, 587 list<Predicate> Preds = []> 588 : NVPTXInst<(outs t_regclass:$dst), 589 (ins s0_regclass:$src0, s1_regclass:$src1), 590 OpcStr, 591 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>, 592 Requires<Preds>; 593 594class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 595 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 596 NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 597 : NVPTXInst<(outs t_regclass:$dst), 598 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 599 OpcStr, 600 [(set t_regclass:$dst, 601 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>, 602 Requires<Preds>; 603 604// 605// MISC 606// 607 608def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 609 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 610 611// 612// Min Max 613// 614 615def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 616 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 617def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 618 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 619def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;", 620 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f, 621 [hasPTX<70>, hasSM<80>]>; 622def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;", 623 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f, 624 [hasPTX<70>, hasSM<80>]>; 625def INT_NVVM_FMIN_XORSIGN_ABS_F : 626 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;", 627 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f, 628 [hasPTX<72>, hasSM<86>]>; 629def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F : 630 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 631 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f, 632 [hasPTX<72>, hasSM<86>]>; 633def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F : 634 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 635 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f, 636 [hasPTX<72>, hasSM<86>]>; 637def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F : 638 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 639 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f, 640 [hasPTX<72>, hasSM<86>]>; 641 642def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 643 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 644def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 645 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 646def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;", 647 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f, 648 [hasPTX<70>, hasSM<80>]>; 649def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;", 650 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f, 651 [hasPTX<70>, hasSM<80>]>; 652def INT_NVVM_FMAX_XORSIGN_ABS_F : 653 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;", 654 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f, 655 [hasPTX<72>, hasSM<86>]>; 656def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F : 657 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f, 659 [hasPTX<72>, hasSM<86>]>; 660def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F : 661 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 662 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f, 663 [hasPTX<72>, hasSM<86>]>; 664def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F : 665 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 666 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f, 667 [hasPTX<72>, hasSM<86>]>; 668 669def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 670 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 671def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 672 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 673 674// 675// Min Max f16, f16x2, bf16, bf16x2 676// 677 678class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 679 list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> { 680 string Variant = V; 681 Intrinsic Intr = I; 682 NVPTXRegClass RegClass = RC; 683 list<Predicate> Predicates = Preds; 684} 685 686multiclass MIN_MAX<string IntName> { 687 foreach P = [ 688 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, 689 int_nvvm_fmax_f16), Int16Regs>, 690 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, 691 int_nvvm_fmax_ftz_f16), Int16Regs>, 692 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, 693 int_nvvm_fmax_nan_f16), Int16Regs>, 694 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), 695 int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>, 696 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), 697 int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), 698 Int16Regs, [hasPTX<72>, hasSM<86>]>, 699 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), 700 int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), 701 Int16Regs, [hasPTX<72>, hasSM<86>]>, 702 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 703 int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), 704 Int16Regs, [hasPTX<72>, hasSM<86>]>, 705 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 706 int_nvvm_fmin_ftz_nan_xorsign_abs_f16, 707 int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>, 708 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, 709 int_nvvm_fmax_f16x2), Int32Regs>, 710 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), 711 int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>, 712 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), 713 int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>, 714 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), 715 int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>, 716 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 717 int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), 718 Int32Regs, [hasPTX<72>, hasSM<86>]>, 719 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 720 int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), 721 Int32Regs, [hasPTX<72>, hasSM<86>]>, 722 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 723 int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), 724 Int32Regs, [hasPTX<72>, hasSM<86>]>, 725 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 726 int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, 727 int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), 728 Int32Regs, [hasPTX<72>, hasSM<86>]>, 729 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), 730 int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, 731 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, 732 int_nvvm_fmax_nan_bf16), Int16Regs>, 733 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"), 734 int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16), 735 Int16Regs, [hasPTX<72>, hasSM<86>]>, 736 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"), 737 int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16), 738 Int16Regs, [hasPTX<72>, hasSM<86>]>, 739 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2, 740 int_nvvm_fmax_bf16x2), Int32Regs>, 741 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"), 742 int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>, 743 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 744 int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2), 745 Int32Regs, [hasPTX<72>, hasSM<86>]>, 746 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 747 int_nvvm_fmin_nan_xorsign_abs_bf16x2, 748 int_nvvm_fmax_nan_xorsign_abs_bf16x2), 749 Int32Regs, [hasPTX<72>, hasSM<86>]>] in { 750 def P.Variant : F_MATH_2<!strconcat( 751 IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"), 752 P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 753 } 754} 755 756defm INT_NVVM_FMIN : MIN_MAX<"min">; 757defm INT_NVVM_FMAN : MIN_MAX<"max">; 758 759// 760// Multiplication 761// 762 763def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 764 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 765def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 766 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 767 768def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 769 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 770def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 771 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 772 773def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 774 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 775def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 776 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 777def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 778 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 779def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 780 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 781def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 782 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 783def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 784 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 785def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 786 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 787def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 788 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 789 790def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 791 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 792def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 793 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 794def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 795 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 796def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 797 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 798 799def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 800 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 801def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 802 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 803 804// 805// Div 806// 807 808def INT_NVVM_DIV_APPROX_FTZ_F 809 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 810 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 811def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 812 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 813 814def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 815 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 816def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 817 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 818def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 819 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 820def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 821 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 822def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 823 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 824def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 825 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 826def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 827 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 828def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 829 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 830 831def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 832 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 833def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 834 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 835def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 836 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 837def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 838 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 839 840// 841// Sad 842// 843 844def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 845 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 846def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 847 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 848 849// 850// Floor Ceil 851// 852 853def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 854 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 855def : Pat<(int_nvvm_floor_f Float32Regs:$a), 856 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 857def : Pat<(int_nvvm_floor_d Float64Regs:$a), 858 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 859 860def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 861 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 862def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 863 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 864def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 865 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 866 867// 868// Abs 869// 870 871def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 872 Float32Regs, int_nvvm_fabs_ftz_f>; 873def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 874 Float32Regs, int_nvvm_fabs_f>; 875 876def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 877 Float64Regs, int_nvvm_fabs_d>; 878 879// 880// Abs, Neg bf16, bf16x2 881// 882 883def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs, 884 Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>; 885def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs, 886 Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>; 887def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs, 888 Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>; 889def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs, 890 Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>; 891 892// 893// Round 894// 895 896def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 897 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 898def : Pat<(int_nvvm_round_f Float32Regs:$a), 899 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 900def : Pat<(int_nvvm_round_d Float64Regs:$a), 901 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 902 903// 904// Trunc 905// 906 907def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 908 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 909def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 910 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 911def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 912 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 913 914// 915// Saturate 916// 917 918def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 919 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 920def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 921 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 922def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 923 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 924 925// 926// Exp2 Log2 927// 928 929def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 930 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 931def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 932 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 933def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 934 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 935def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", 936 Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; 937def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", 938 Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; 939 940def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 941 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 942def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 943 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 944def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 945 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 946 947// 948// Sin Cos 949// 950 951def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 952 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 953def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 954 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 955 956def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 957 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 958def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 959 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 960 961// 962// Fma 963// 964 965class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 966 list<Predicate> Preds = []> { 967 string Variant = V; 968 Intrinsic Intr = I; 969 NVPTXRegClass RegClass = RC; 970 list<Predicate> Predicates = Preds; 971} 972 973multiclass FMA_INST { 974 foreach P = [ 975 FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>, 976 FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>, 977 FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>, 978 FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>, 979 980 FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>, 981 FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>, 982 FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>, 983 FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>, 984 FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>, 985 FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>, 986 FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, 987 FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, 988 989 FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, 990 FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs, 991 [hasPTX<42>, hasSM<53>]>, 992 FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs, 993 [hasPTX<42>, hasSM<53>]>, 994 FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs, 995 [hasPTX<42>, hasSM<53>]>, 996 FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs, 997 [hasPTX<70>, hasSM<80>]>, 998 FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs, 999 [hasPTX<70>, hasSM<80>]>, 1000 1001 FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>, 1002 FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs, 1003 [hasPTX<70>, hasSM<80>]>, 1004 FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs, 1005 [hasPTX<70>, hasSM<80>]>, 1006 FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs, 1007 [hasPTX<70>, hasSM<80>]>, 1008 FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, 1009 [hasPTX<70>, hasSM<80>]>, 1010 FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs, 1011 [hasPTX<70>, hasSM<80>]>, 1012 1013 FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs, 1014 [hasPTX<42>, hasSM<53>]>, 1015 FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs, 1016 [hasPTX<42>, hasSM<53>]>, 1017 FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs, 1018 [hasPTX<42>, hasSM<53>]>, 1019 FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, 1020 Int32Regs, [hasPTX<42>, hasSM<53>]>, 1021 FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs, 1022 [hasPTX<70>, hasSM<80>]>, 1023 FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, 1024 Int32Regs, [hasPTX<70>, hasSM<80>]>, 1025 FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, 1026 [hasPTX<70>, hasSM<80>]>, 1027 FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, 1028 [hasPTX<70>, hasSM<80>]> 1029 ] in { 1030 def P.Variant : 1031 F_MATH_3<!strconcat("fma", 1032 !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"), 1033 P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 1034 } 1035} 1036 1037defm INT_NVVM_FMA : FMA_INST; 1038 1039// 1040// Rcp 1041// 1042 1043def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 1044 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 1045def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 1046 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 1047def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 1048 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 1049def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 1050 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 1051def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 1052 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 1053def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 1054 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 1055def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 1056 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 1057def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 1058 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 1059 1060def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 1061 Float64Regs, int_nvvm_rcp_rn_d>; 1062def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 1063 Float64Regs, int_nvvm_rcp_rz_d>; 1064def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 1065 Float64Regs, int_nvvm_rcp_rm_d>; 1066def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 1067 Float64Regs, int_nvvm_rcp_rp_d>; 1068 1069def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;", 1070 Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>; 1071def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 1072 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 1073 1074// 1075// Sqrt 1076// 1077 1078def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 1079 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 1080def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 1081 Float32Regs, int_nvvm_sqrt_rn_f>; 1082def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 1083 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 1084def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 1085 Float32Regs, int_nvvm_sqrt_rz_f>; 1086def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 1087 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 1088def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 1089 Float32Regs, int_nvvm_sqrt_rm_f>; 1090def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 1091 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 1092def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 1093 Float32Regs, int_nvvm_sqrt_rp_f>; 1094def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 1095 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 1096def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 1097 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 1098 1099def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 1100 Float64Regs, int_nvvm_sqrt_rn_d>; 1101def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 1102 Float64Regs, int_nvvm_sqrt_rz_d>; 1103def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 1104 Float64Regs, int_nvvm_sqrt_rm_d>; 1105def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 1106 Float64Regs, int_nvvm_sqrt_rp_d>; 1107 1108// nvvm_sqrt intrinsic 1109def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1110 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 1111def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1112 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 1113def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1114 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 1115def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1116 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 1117 1118// 1119// Rsqrt 1120// 1121 1122def INT_NVVM_RSQRT_APPROX_FTZ_F 1123 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 1124 int_nvvm_rsqrt_approx_ftz_f>; 1125def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 1126 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 1127def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 1128 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 1129 1130// 1131// Add 1132// 1133 1134def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 1135 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 1136def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 1137 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 1138def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 1139 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 1140def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 1141 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 1142def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 1143 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 1144def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 1145 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 1146def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 1147 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 1148def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 1149 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 1150 1151def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 1152 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 1153def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 1154 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 1155def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 1156 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 1157def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 1158 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 1159 1160// 1161// Convert 1162// 1163 1164def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 1165 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 1166def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 1167 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 1168def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 1169 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 1170def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 1171 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 1172def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 1173 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 1174def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 1175 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 1176def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 1177 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 1178def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 1179 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 1180 1181def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 1182 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 1183def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 1184 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 1185def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 1186 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 1187def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 1188 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 1189 1190def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 1191 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 1192def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 1193 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 1194def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 1195 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 1196def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 1197 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 1198 1199def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 1200 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 1201def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 1202 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 1203def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 1204 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 1205def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 1206 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 1207 1208def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 1209 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 1210def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 1211 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 1212def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 1213 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 1214def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 1215 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 1216 1217def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 1218 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1219def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 1220 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 1221def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 1222 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1223def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 1224 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 1225def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 1226 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1227def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 1228 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 1229def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 1230 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1231def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 1232 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 1233 1234def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 1235 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1236def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 1237 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 1238def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 1239 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1240def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 1241 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 1242def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 1243 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1244def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 1245 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 1246def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 1247 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1248def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 1249 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 1250 1251def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 1252 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 1253def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 1254 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 1255def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 1256 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 1257def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 1258 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 1259 1260def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 1261 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 1262def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 1263 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 1264def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 1265 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 1266def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 1267 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 1268 1269def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b), 1270 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1271def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1272 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1273def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b), 1274 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1275def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1276 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1277 1278def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b), 1279 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1280def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1281 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1282def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b), 1283 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1284def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1285 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1286 1287def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a), 1288 (CVT_bf16_f32 Float32Regs:$a, CvtRN)>; 1289def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a), 1290 (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>; 1291def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a), 1292 (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>; 1293def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a), 1294 (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>; 1295 1296def CVT_tf32_f32 : 1297 NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a), 1298 "cvt.rna.tf32.f32 \t$dest, $a;", 1299 [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>; 1300 1301def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1302 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1303 1304def INT_NVVM_D2I_LO : F_MATH_1< 1305 !strconcat("{{\n\t", 1306 ".reg .b32 %temp; \n\t", 1307 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1308 "}}"), 1309 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1310def INT_NVVM_D2I_HI : F_MATH_1< 1311 !strconcat("{{\n\t", 1312 ".reg .b32 %temp; \n\t", 1313 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1314 "}}"), 1315 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1316 1317def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 1318 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1319def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 1320 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 1321def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 1322 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1323def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 1324 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 1325def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 1326 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1327def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 1328 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 1329def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 1330 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1331def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 1332 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 1333 1334def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 1335 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1336def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 1337 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 1338def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 1339 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1340def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 1341 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 1342def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 1343 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1344def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 1345 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 1346def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 1347 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1348def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 1349 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 1350 1351def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 1352 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 1353def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 1354 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 1355def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 1356 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 1357def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 1358 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 1359 1360def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 1361 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 1362def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 1363 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 1364def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 1365 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 1366def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 1367 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 1368 1369def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 1370 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 1371def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 1372 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 1373def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 1374 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 1375def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 1376 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 1377 1378def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 1379 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 1380def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 1381 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 1382def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 1383 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 1384def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 1385 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 1386 1387def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 1388 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 1389def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 1390 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 1391def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 1392 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 1393def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 1394 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 1395 1396def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 1397 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 1398def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 1399 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 1400def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 1401 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 1402def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 1403 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 1404 1405 1406def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 1407 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; 1408def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 1409 (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 1410 1411// 1412// Bitcast 1413// 1414 1415def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 1416 Float32Regs, int_nvvm_bitcast_f2i>; 1417def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 1418 Int32Regs, int_nvvm_bitcast_i2f>; 1419 1420def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1421 Int64Regs, int_nvvm_bitcast_ll2d>; 1422def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1423 Float64Regs, int_nvvm_bitcast_d2ll>; 1424 1425// 1426// FNS 1427// 1428 1429class INT_FNS_MBO<dag ins, dag Operands> 1430 : NVPTXInst<(outs Int32Regs:$dst), ins, 1431 "fns.b32 \t$dst, $mask, $base, $offset;", 1432 [(set Int32Regs:$dst, Operands )]>, 1433 Requires<[hasPTX<60>, hasSM<30>]>; 1434 1435def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1436 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1437def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1438 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1439def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1440 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1441def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1442 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1443def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1444 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1445def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1446 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1447def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1448 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1449def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1450 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1451 1452//----------------------------------- 1453// Atomic Functions 1454//----------------------------------- 1455 1456class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1457 : PatFrag<ops, frag, AS_match.global>; 1458class ATOMIC_SHARED_CHK <dag ops, dag frag> 1459 : PatFrag<ops, frag, AS_match.shared>; 1460class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1461 : PatFrag<ops, frag, AS_match.generic>; 1462 1463multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1464 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1465 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1466 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1467 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1468 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1469 Requires<Pred>; 1470 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1471 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1472 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1473 Requires<Pred>; 1474} 1475multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1476 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1477 list<Predicate> Pred = []> { 1478 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1479 IntOp, IMMType, IMM, Pred>; 1480 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1481 IntOp, IMMType, IMM, Pred>; 1482} 1483 1484// has 2 operands, neg the second one 1485multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1486 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1487 list<Predicate> Pred> { 1488 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1489 !strconcat( 1490 "{{ \n\t", 1491 ".reg \t.s", TypeStr, " temp; \n\t", 1492 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1493 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1494 "}}"), 1495 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1496 Requires<Pred>; 1497} 1498multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1499 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { 1500 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1501 IntOp, Pred> ; 1502 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1503 IntOp, Pred> ; 1504} 1505 1506// has 3 operands 1507multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1508 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1509 Operand IMMType, list<Predicate> Pred> { 1510 def reg : NVPTXInst<(outs regclass:$dst), 1511 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1512 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1513 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1514 Requires<Pred>; 1515 1516 def imm1 : NVPTXInst<(outs regclass:$dst), 1517 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1518 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1519 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1520 Requires<Pred>; 1521 1522 def imm2 : NVPTXInst<(outs regclass:$dst), 1523 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1524 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1525 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1526 Requires<Pred>; 1527 1528 def imm3 : NVPTXInst<(outs regclass:$dst), 1529 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1530 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1531 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1532 Requires<Pred>; 1533} 1534multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1535 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1536 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1537 IntOp, IMMType, Pred>; 1538 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1539 IntOp, IMMType, Pred>; 1540} 1541 1542// atom_add 1543 1544def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1545 (atomic_load_add_32 node:$a, node:$b)>; 1546def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1547 (atomic_load_add_32 node:$a, node:$b)>; 1548def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1549 (atomic_load_add_32 node:$a, node:$b)>; 1550def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1551 (atomic_load_add_64 node:$a, node:$b)>; 1552def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1553 (atomic_load_add_64 node:$a, node:$b)>; 1554def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1555 (atomic_load_add_64 node:$a, node:$b)>; 1556def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1557 (atomic_load_fadd node:$a, node:$b)>; 1558def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1559 (atomic_load_fadd node:$a, node:$b)>; 1560def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1561 (atomic_load_fadd node:$a, node:$b)>; 1562 1563defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1564 atomic_load_add_32_g, i32imm, imm>; 1565defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1566 atomic_load_add_32_s, i32imm, imm>; 1567defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1568 atomic_load_add_32_gen, i32imm, imm>; 1569defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1570 ".add", atomic_load_add_32_gen, i32imm, imm>; 1571 1572defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1573 atomic_load_add_64_g, i64imm, imm>; 1574defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1575 atomic_load_add_64_s, i64imm, imm>; 1576defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1577 atomic_load_add_64_gen, i64imm, imm>; 1578defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1579 ".add", atomic_load_add_64_gen, i64imm, imm>; 1580 1581defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1582 atomic_load_add_g, f32imm, fpimm>; 1583defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1584 atomic_load_add_s, f32imm, fpimm>; 1585defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1586 atomic_load_add_gen, f32imm, fpimm>; 1587 1588defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1589 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1590defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1591 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1592defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1593 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1594 1595// atom_sub 1596 1597def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1598 (atomic_load_sub_32 node:$a, node:$b)>; 1599def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1600 (atomic_load_sub_32 node:$a, node:$b)>; 1601def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1602 (atomic_load_sub_32 node:$a, node:$b)>; 1603def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1604 (atomic_load_sub_64 node:$a, node:$b)>; 1605def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1606 (atomic_load_sub_64 node:$a, node:$b)>; 1607def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1608 (atomic_load_sub_64 node:$a, node:$b)>; 1609 1610defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1611 atomic_load_sub_32_g>; 1612defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1613 atomic_load_sub_64_g>; 1614defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1615 atomic_load_sub_32_gen>; 1616defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1617 ".add", atomic_load_sub_32_gen>; 1618defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1619 atomic_load_sub_32_s>; 1620defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1621 atomic_load_sub_64_s>; 1622defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1623 atomic_load_sub_64_gen>; 1624defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1625 ".add", atomic_load_sub_64_gen>; 1626 1627// atom_swap 1628 1629def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1630 (atomic_swap_32 node:$a, node:$b)>; 1631def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1632 (atomic_swap_32 node:$a, node:$b)>; 1633def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1634 (atomic_swap_32 node:$a, node:$b)>; 1635def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1636 (atomic_swap_64 node:$a, node:$b)>; 1637def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1638 (atomic_swap_64 node:$a, node:$b)>; 1639def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1640 (atomic_swap_64 node:$a, node:$b)>; 1641 1642defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1643 atomic_swap_32_g, i32imm, imm>; 1644defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1645 atomic_swap_32_s, i32imm, imm>; 1646defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1647 atomic_swap_32_gen, i32imm, imm>; 1648defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1649 ".exch", atomic_swap_32_gen, i32imm, imm>; 1650defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1651 atomic_swap_64_g, i64imm, imm>; 1652defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1653 atomic_swap_64_s, i64imm, imm>; 1654defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1655 atomic_swap_64_gen, i64imm, imm>; 1656defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1657 ".exch", atomic_swap_64_gen, i64imm, imm>; 1658 1659// atom_max 1660 1661def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1662 , (atomic_load_max_32 node:$a, node:$b)>; 1663def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1664 (atomic_load_max_32 node:$a, node:$b)>; 1665def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1666 (atomic_load_max_32 node:$a, node:$b)>; 1667def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1668 , (atomic_load_max_64 node:$a, node:$b)>; 1669def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1670 (atomic_load_max_64 node:$a, node:$b)>; 1671def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1672 (atomic_load_max_64 node:$a, node:$b)>; 1673def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1674 (atomic_load_umax_32 node:$a, node:$b)>; 1675def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1676 (atomic_load_umax_32 node:$a, node:$b)>; 1677def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1678 (atomic_load_umax_32 node:$a, node:$b)>; 1679def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1680 (atomic_load_umax_64 node:$a, node:$b)>; 1681def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1682 (atomic_load_umax_64 node:$a, node:$b)>; 1683def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1684 (atomic_load_umax_64 node:$a, node:$b)>; 1685 1686defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1687 ".max", atomic_load_max_32_g, i32imm, imm>; 1688defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1689 ".max", atomic_load_max_32_s, i32imm, imm>; 1690defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1691 atomic_load_max_32_gen, i32imm, imm>; 1692defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1693 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1694defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1695 ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>; 1696defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1697 ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>; 1698defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1699 atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>; 1700defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1701 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>; 1702defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1703 ".max", atomic_load_umax_32_g, i32imm, imm>; 1704defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1705 ".max", atomic_load_umax_32_s, i32imm, imm>; 1706defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1707 atomic_load_umax_32_gen, i32imm, imm>; 1708defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1709 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1710defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1711 ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>; 1712defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1713 ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>; 1714defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1715 atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>; 1716defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1717 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>; 1718 1719// atom_min 1720 1721def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1722 (atomic_load_min_32 node:$a, node:$b)>; 1723def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1724 (atomic_load_min_32 node:$a, node:$b)>; 1725def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1726 (atomic_load_min_32 node:$a, node:$b)>; 1727def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1728 (atomic_load_min_64 node:$a, node:$b)>; 1729def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1730 (atomic_load_min_64 node:$a, node:$b)>; 1731def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1732 (atomic_load_min_64 node:$a, node:$b)>; 1733def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1734 (atomic_load_umin_32 node:$a, node:$b)>; 1735def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1736 (atomic_load_umin_32 node:$a, node:$b)>; 1737def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1738 (atomic_load_umin_32 node:$a, node:$b)>; 1739def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1740 (atomic_load_umin_64 node:$a, node:$b)>; 1741def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1742 (atomic_load_umin_64 node:$a, node:$b)>; 1743def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1744 (atomic_load_umin_64 node:$a, node:$b)>; 1745 1746defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1747 ".min", atomic_load_min_32_g, i32imm, imm>; 1748defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1749 ".min", atomic_load_min_32_s, i32imm, imm>; 1750defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1751 atomic_load_min_32_gen, i32imm, imm>; 1752defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1753 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1754defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1755 ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>; 1756defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1757 ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>; 1758defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1759 atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>; 1760defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1761 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>; 1762defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1763 ".min", atomic_load_umin_32_g, i32imm, imm>; 1764defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1765 ".min", atomic_load_umin_32_s, i32imm, imm>; 1766defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1767 atomic_load_umin_32_gen, i32imm, imm>; 1768defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1769 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1770defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1771 ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>; 1772defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1773 ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>; 1774defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1775 atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>; 1776defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1777 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>; 1778 1779// atom_inc atom_dec 1780 1781def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1782 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1783def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1784 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1785def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1786 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1787def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1788 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1789def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1790 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1791def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1792 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1793 1794defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1795 atomic_load_inc_32_g, i32imm, imm>; 1796defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1797 atomic_load_inc_32_s, i32imm, imm>; 1798defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1799 atomic_load_inc_32_gen, i32imm, imm>; 1800defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1801 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1802defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1803 atomic_load_dec_32_g, i32imm, imm>; 1804defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1805 atomic_load_dec_32_s, i32imm, imm>; 1806defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1807 atomic_load_dec_32_gen, i32imm, imm>; 1808defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1809 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1810 1811// atom_and 1812 1813def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1814 (atomic_load_and_32 node:$a, node:$b)>; 1815def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1816 (atomic_load_and_32 node:$a, node:$b)>; 1817def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1818 (atomic_load_and_32 node:$a, node:$b)>; 1819def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1820 (atomic_load_and_64 node:$a, node:$b)>; 1821def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1822 (atomic_load_and_64 node:$a, node:$b)>; 1823def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1824 (atomic_load_and_64 node:$a, node:$b)>; 1825 1826defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1827 atomic_load_and_32_g, i32imm, imm>; 1828defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1829 atomic_load_and_32_s, i32imm, imm>; 1830defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1831 atomic_load_and_32_gen, i32imm, imm>; 1832defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1833 ".and", atomic_load_and_32_gen, i32imm, imm>; 1834defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1835 atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>; 1836defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1837 atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>; 1838defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1839 atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>; 1840defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1841 ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>; 1842 1843// atom_or 1844 1845def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1846 (atomic_load_or_32 node:$a, node:$b)>; 1847def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1848 (atomic_load_or_32 node:$a, node:$b)>; 1849def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1850 (atomic_load_or_32 node:$a, node:$b)>; 1851def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1852 (atomic_load_or_64 node:$a, node:$b)>; 1853def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1854 (atomic_load_or_64 node:$a, node:$b)>; 1855def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1856 (atomic_load_or_64 node:$a, node:$b)>; 1857 1858defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1859 atomic_load_or_32_g, i32imm, imm>; 1860defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1861 atomic_load_or_32_gen, i32imm, imm>; 1862defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1863 ".or", atomic_load_or_32_gen, i32imm, imm>; 1864defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1865 atomic_load_or_32_s, i32imm, imm>; 1866defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1867 atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>; 1868defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1869 atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>; 1870defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1871 ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>; 1872defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1873 atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>; 1874 1875// atom_xor 1876 1877def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1878 (atomic_load_xor_32 node:$a, node:$b)>; 1879def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1880 (atomic_load_xor_32 node:$a, node:$b)>; 1881def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1882 (atomic_load_xor_32 node:$a, node:$b)>; 1883def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1884 (atomic_load_xor_64 node:$a, node:$b)>; 1885def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1886 (atomic_load_xor_64 node:$a, node:$b)>; 1887def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1888 (atomic_load_xor_64 node:$a, node:$b)>; 1889 1890defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1891 atomic_load_xor_32_g, i32imm, imm>; 1892defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1893 atomic_load_xor_32_s, i32imm, imm>; 1894defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1895 atomic_load_xor_32_gen, i32imm, imm>; 1896defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1897 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1898defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1899 atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>; 1900defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1901 atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>; 1902defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1903 atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>; 1904defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1905 ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>; 1906 1907// atom_cas 1908 1909def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1910 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1911def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1912 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1913def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1914 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1915def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1916 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1917def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1918 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1919def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1920 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1921 1922defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1923 atomic_cmp_swap_32_g, i32imm>; 1924defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1925 atomic_cmp_swap_32_s, i32imm>; 1926defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1927 atomic_cmp_swap_32_gen, i32imm>; 1928defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1929 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1930defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1931 atomic_cmp_swap_64_g, i64imm>; 1932defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1933 atomic_cmp_swap_64_s, i64imm>; 1934defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1935 atomic_cmp_swap_64_gen, i64imm>; 1936defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1937 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1938 1939// Support for scoped atomic operations. Matches 1940// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1941// and converts it into the appropriate instruction. 1942// NOTE: not all possible combinations are implemented 1943// 'space' is limited to generic as it's the only one needed to support CUDA. 1944// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1945class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1946 dag ins, dag Operands> 1947 : NVPTXInst<(outs regclass:$result), ins, 1948 AsmStr, 1949 [(set regclass:$result, Operands)]>, 1950 Requires<Preds>; 1951 1952// Define instruction variants for all addressing modes. 1953multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1954 NVPTXRegClass regclass, Operand ImmType, 1955 SDNode Imm, ValueType ImmTy, 1956 list<Predicate> Preds> { 1957 let AddedComplexity = 1 in { 1958 def : ATOM23_impl<AsmStr, regclass, Preds, 1959 (ins Int32Regs:$src, regclass:$b), 1960 (Intr Int32Regs:$src, regclass:$b)>; 1961 def : ATOM23_impl<AsmStr, regclass, Preds, 1962 (ins Int64Regs:$src, regclass:$b), 1963 (Intr Int64Regs:$src, regclass:$b)>; 1964 } 1965 // tablegen can't infer argument types from Intrinsic (though it can 1966 // from Instruction) so we have to enforce specific type on 1967 // immediates via explicit cast to ImmTy. 1968 def : ATOM23_impl<AsmStr, regclass, Preds, 1969 (ins Int32Regs:$src, ImmType:$b), 1970 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1971 def : ATOM23_impl<AsmStr, regclass, Preds, 1972 (ins Int64Regs:$src, ImmType:$b), 1973 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1974} 1975 1976multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1977 NVPTXRegClass regclass, Operand ImmType, 1978 SDNode Imm, ValueType ImmTy, 1979 list<Predicate> Preds> { 1980 // Variants for register/immediate permutations of $b and $c 1981 let AddedComplexity = 2 in { 1982 def : ATOM23_impl<AsmStr, regclass, Preds, 1983 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1984 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1985 def : ATOM23_impl<AsmStr, regclass, Preds, 1986 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1987 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1988 } 1989 let AddedComplexity = 1 in { 1990 def : ATOM23_impl<AsmStr, regclass, Preds, 1991 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1992 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1993 def : ATOM23_impl<AsmStr, regclass, Preds, 1994 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1995 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1996 def : ATOM23_impl<AsmStr, regclass, Preds, 1997 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1998 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1999 def : ATOM23_impl<AsmStr, regclass, Preds, 2000 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 2001 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 2002 } 2003 def : ATOM23_impl<AsmStr, regclass, Preds, 2004 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 2005 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 2006 def : ATOM23_impl<AsmStr, regclass, Preds, 2007 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 2008 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 2009} 2010 2011// Constructs intrinsic name and instruction asm strings. 2012multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 2013 string ScopeStr, string SpaceStr, 2014 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2015 ValueType ImmTy, list<Predicate> Preds> { 2016 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 2017 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2018 # "." # OpStr # "." # TypeStr 2019 # " \t$result, [$src], $b;", 2020 !cast<Intrinsic>( 2021 "int_nvvm_atomic_" # OpStr 2022 # "_" # SpaceStr # "_" # IntTypeStr 2023 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2024 regclass, ImmType, Imm, ImmTy, Preds>; 2025} 2026multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 2027 string ScopeStr, string SpaceStr, 2028 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2029 ValueType ImmTy, list<Predicate> Preds> { 2030 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 2031 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2032 # "." # OpStr # "." # TypeStr 2033 # " \t$result, [$src], $b, $c;", 2034 !cast<Intrinsic>( 2035 "int_nvvm_atomic_" # OpStr 2036 # "_" # SpaceStr # "_" # IntTypeStr 2037 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2038 regclass, ImmType, Imm, ImmTy, Preds>; 2039} 2040 2041// Constructs variants for different address spaces. 2042// For now we only need variants for generic space pointers. 2043multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 2044 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 2045 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2046 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2047 regclass, ImmType, Imm, ImmTy, Preds>; 2048} 2049multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 2050 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 2051 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2052 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2053 regclass, ImmType, Imm, ImmTy, Preds>; 2054} 2055 2056// Constructs variants for different scopes of atomic op. 2057multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 2058 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2059 ValueType ImmTy, list<Predicate> Preds> { 2060 // .gpu scope is default and is currently covered by existing 2061 // atomics w/o explicitly specified scope. 2062 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2063 regclass, ImmType, Imm, ImmTy, 2064 !listconcat(Preds,[hasAtomScope])>; 2065 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2066 regclass, ImmType, Imm, ImmTy, 2067 !listconcat(Preds,[hasAtomScope])>; 2068} 2069multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 2070 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 2071 list<Predicate> Preds> { 2072 // No need to define ".gpu"-scoped atomics. They do the same thing 2073 // as the regular, non-scoped atomics defined elsewhere. 2074 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2075 regclass, ImmType, Imm, ImmTy, 2076 !listconcat(Preds,[hasAtomScope])>; 2077 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2078 regclass, ImmType, Imm, ImmTy, 2079 !listconcat(Preds,[hasAtomScope])>; 2080} 2081 2082// atom.add 2083multiclass ATOM2_add_impl<string OpStr> { 2084 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 2085 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 2086 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 2087 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 2088 []>; 2089 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 2090 [hasAtomAddF64]>; 2091} 2092 2093// atom.{and,or,xor} 2094multiclass ATOM2_bitwise_impl<string OpStr> { 2095 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 2096 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 2097 [hasAtomBitwise64]>; 2098} 2099 2100// atom.exch 2101multiclass ATOM2_exch_impl<string OpStr> { 2102 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 2103 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 2104} 2105 2106// atom.{min,max} 2107multiclass ATOM2_minmax_impl<string OpStr> { 2108 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 2109 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 2110 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 2111 [hasAtomMinMax64]>; 2112 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 2113 [hasAtomMinMax64]>; 2114} 2115 2116// atom.{inc,dec} 2117multiclass ATOM2_incdec_impl<string OpStr> { 2118 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 2119} 2120 2121// atom.cas 2122multiclass ATOM3_cas_impl<string OpStr> { 2123 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 2124 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 2125} 2126 2127defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 2128defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 2129defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 2130defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 2131defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 2132defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 2133defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 2134defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 2135defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 2136defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 2137 2138//----------------------------------- 2139// Support for ldu on sm_20 or later 2140//----------------------------------- 2141 2142// Don't annotate ldu instructions as mayLoad, as they load from memory that is 2143// read-only in a kernel. 2144 2145// Scalar 2146 2147multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 2148 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2149 !strconcat("ldu.global.", TyStr), 2150 []>, Requires<[hasLDU]>; 2151 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2152 !strconcat("ldu.global.", TyStr), 2153 []>, Requires<[hasLDU]>; 2154 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2155 !strconcat("ldu.global.", TyStr), 2156 []>, Requires<[hasLDU]>; 2157 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2158 !strconcat("ldu.global.", TyStr), 2159 []>, Requires<[hasLDU]>; 2160 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2161 !strconcat("ldu.global.", TyStr), 2162 []>, Requires<[hasLDU]>; 2163} 2164 2165defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 2166defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 2167defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 2168defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 2169defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 2170defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 2171 2172// vector 2173 2174// Elementized vector ldu 2175multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2176 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2177 (ins Int32Regs:$src), 2178 !strconcat("ldu.global.", TyStr), []>; 2179 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2180 (ins Int64Regs:$src), 2181 !strconcat("ldu.global.", TyStr), []>; 2182 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2183 (ins MEMri:$src), 2184 !strconcat("ldu.global.", TyStr), []>; 2185 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2186 (ins MEMri64:$src), 2187 !strconcat("ldu.global.", TyStr), []>; 2188 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2189 (ins imemAny:$src), 2190 !strconcat("ldu.global.", TyStr), []>; 2191} 2192 2193multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2194 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2195 regclass:$dst4), (ins Int32Regs:$src), 2196 !strconcat("ldu.global.", TyStr), []>; 2197 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2198 regclass:$dst4), (ins Int64Regs:$src), 2199 !strconcat("ldu.global.", TyStr), []>; 2200 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2201 regclass:$dst4), (ins MEMri:$src), 2202 !strconcat("ldu.global.", TyStr), []>; 2203 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2204 regclass:$dst4), (ins MEMri64:$src), 2205 !strconcat("ldu.global.", TyStr), []>; 2206 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2207 regclass:$dst4), (ins imemAny:$src), 2208 !strconcat("ldu.global.", TyStr), []>; 2209} 2210 2211defm INT_PTX_LDU_G_v2i8_ELE 2212 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2213defm INT_PTX_LDU_G_v2i16_ELE 2214 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2215defm INT_PTX_LDU_G_v2i32_ELE 2216 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2217defm INT_PTX_LDU_G_v2f32_ELE 2218 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2219defm INT_PTX_LDU_G_v2i64_ELE 2220 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2221defm INT_PTX_LDU_G_v2f64_ELE 2222 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2223defm INT_PTX_LDU_G_v4i8_ELE 2224 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2225defm INT_PTX_LDU_G_v4i16_ELE 2226 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2227 Int16Regs>; 2228defm INT_PTX_LDU_G_v4i32_ELE 2229 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2230 Int32Regs>; 2231defm INT_PTX_LDU_G_v4f16_ELE 2232 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2233 Int16Regs>; 2234defm INT_PTX_LDU_G_v4f16x2_ELE 2235 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2236 Int32Regs>; 2237defm INT_PTX_LDU_G_v4f32_ELE 2238 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2239 Float32Regs>; 2240 2241 2242//----------------------------------- 2243// Support for ldg on sm_35 or later 2244//----------------------------------- 2245 2246// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2247// non-coherent texture cache, and therefore the values read must be read-only 2248// during the lifetime of the kernel. 2249 2250multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2251 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2252 !strconcat("ld.global.nc.", TyStr), 2253 []>, Requires<[hasLDG]>; 2254 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2255 !strconcat("ld.global.nc.", TyStr), 2256 []>, Requires<[hasLDG]>; 2257 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2258 !strconcat("ld.global.nc.", TyStr), 2259 []>, Requires<[hasLDG]>; 2260 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2261 !strconcat("ld.global.nc.", TyStr), 2262 []>, Requires<[hasLDG]>; 2263 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2264 !strconcat("ld.global.nc.", TyStr), 2265 []>, Requires<[hasLDG]>; 2266} 2267 2268defm INT_PTX_LDG_GLOBAL_i8 2269 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2270defm INT_PTX_LDG_GLOBAL_i16 2271 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2272defm INT_PTX_LDG_GLOBAL_i32 2273 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2274defm INT_PTX_LDG_GLOBAL_i64 2275 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2276defm INT_PTX_LDG_GLOBAL_f32 2277 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2278defm INT_PTX_LDG_GLOBAL_f64 2279 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2280 2281// vector 2282 2283// Elementized vector ldg 2284multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2285 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2286 (ins Int32Regs:$src), 2287 !strconcat("ld.global.nc.", TyStr), []>; 2288 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2289 (ins Int64Regs:$src), 2290 !strconcat("ld.global.nc.", TyStr), []>; 2291 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2292 (ins MEMri:$src), 2293 !strconcat("ld.global.nc.", TyStr), []>; 2294 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2295 (ins MEMri64:$src), 2296 !strconcat("ld.global.nc.", TyStr), []>; 2297 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2298 (ins imemAny:$src), 2299 !strconcat("ld.global.nc.", TyStr), []>; 2300} 2301 2302multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2303 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2304 regclass:$dst4), (ins Int32Regs:$src), 2305 !strconcat("ld.global.nc.", TyStr), []>; 2306 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2307 regclass:$dst4), (ins Int64Regs:$src), 2308 !strconcat("ld.global.nc.", TyStr), []>; 2309 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2310 regclass:$dst4), (ins MEMri:$src), 2311 !strconcat("ld.global.nc.", TyStr), []>; 2312 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2313 regclass:$dst4), (ins MEMri64:$src), 2314 !strconcat("ld.global.nc.", TyStr), []>; 2315 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2316 regclass:$dst4), (ins imemAny:$src), 2317 !strconcat("ld.global.nc.", TyStr), []>; 2318} 2319 2320// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2321defm INT_PTX_LDG_G_v2i8_ELE 2322 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2323defm INT_PTX_LDG_G_v2i16_ELE 2324 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2325defm INT_PTX_LDG_G_v2i32_ELE 2326 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2327defm INT_PTX_LDG_G_v2f32_ELE 2328 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2329defm INT_PTX_LDG_G_v2i64_ELE 2330 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2331defm INT_PTX_LDG_G_v2f64_ELE 2332 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2333defm INT_PTX_LDG_G_v4i8_ELE 2334 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2335defm INT_PTX_LDG_G_v4i16_ELE 2336 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2337defm INT_PTX_LDG_G_v4i32_ELE 2338 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2339defm INT_PTX_LDG_G_v4f32_ELE 2340 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2341 2342 2343multiclass NG_TO_G<string Str, Intrinsic Intrin> { 2344 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2345 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 2346 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2347 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2348 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 2349 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2350 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 2351 "{{ .reg .b64 %tmp;\n\t" 2352 #" cvt.u64.u32 \t%tmp, $src;\n\t" 2353 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 2354 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 2355 Requires<[useShortPtr]>; 2356} 2357 2358multiclass G_TO_NG<string Str, Intrinsic Intrin> { 2359 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2360 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 2361 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2362 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2363 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 2364 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2365 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 2366 "{{ .reg .b64 %tmp;\n\t" 2367 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 2368 #" cvt.u32.u64 \t$result, %tmp; }}", 2369 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 2370 Requires<[useShortPtr]>; 2371} 2372 2373defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 2374defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 2375defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 2376defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 2377 2378defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 2379defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 2380defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 2381defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 2382 2383 2384// nvvm.ptr.gen.to.param 2385def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 2386 (ins Int32Regs:$src), 2387 "mov.u32 \t$result, $src;", 2388 [(set Int32Regs:$result, 2389 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 2390def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 2391 (ins Int64Regs:$src), 2392 "mov.u64 \t$result, $src;", 2393 [(set Int64Regs:$result, 2394 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2395 2396 2397// nvvm.move intrinsicc 2398def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2399 "mov.b16 \t$r, $s;", 2400 [(set Int16Regs:$r, 2401 (int_nvvm_move_i16 Int16Regs:$s))]>; 2402def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2403 "mov.b32 \t$r, $s;", 2404 [(set Int32Regs:$r, 2405 (int_nvvm_move_i32 Int32Regs:$s))]>; 2406def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2407 "mov.b64 \t$r, $s;", 2408 [(set Int64Regs:$r, 2409 (int_nvvm_move_i64 Int64Regs:$s))]>; 2410def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2411 "mov.f32 \t$r, $s;", 2412 [(set Float32Regs:$r, 2413 (int_nvvm_move_float Float32Regs:$s))]>; 2414def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2415 "mov.f64 \t$r, $s;", 2416 [(set Float64Regs:$r, 2417 (int_nvvm_move_double Float64Regs:$s))]>; 2418def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2419 "mov.u32 \t$r, $s;", 2420 [(set Int32Regs:$r, 2421 (int_nvvm_move_ptr Int32Regs:$s))]>; 2422def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2423 "mov.u64 \t$r, $s;", 2424 [(set Int64Regs:$r, 2425 (int_nvvm_move_ptr Int64Regs:$s))]>; 2426 2427// @TODO: Are these actually needed, or will we always just see symbols 2428// copied to registers first? 2429/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2430 "mov.u32 \t$r, $s;", 2431 [(set Int32Regs:$r, 2432 (int_nvvm_move_ptr texternalsym:$s))]>; 2433def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2434 "mov.u64 \t$r, $s;", 2435 [(set Int64Regs:$r, 2436 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2437 2438 2439// MoveParam %r1, param 2440// ptr_local_to_gen %r2, %r1 2441// ptr_gen_to_local %r3, %r2 2442// -> 2443// mov %r1, param 2444 2445// @TODO: Revisit this. There is a type 2446// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2447// instructions are not currently defined. However, we can use the ptr 2448// variants and the asm printer will do the right thing. 2449def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2450 (MoveParam texternalsym:$src)))), 2451 (nvvm_move_ptr64 texternalsym:$src)>; 2452def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2453 (MoveParam texternalsym:$src)))), 2454 (nvvm_move_ptr32 texternalsym:$src)>; 2455 2456def texsurf_handles 2457 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2458 "mov.u64 \t$result, $src;", []>; 2459 2460//----------------------------------- 2461// Compiler Error Warn 2462// - Just ignore them in codegen 2463//----------------------------------- 2464 2465def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2466 "// llvm.nvvm.compiler.warn()", 2467 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2468def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2469 "// llvm.nvvm.compiler.warn()", 2470 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2471def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2472 "// llvm.nvvm.compiler.error()", 2473 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2474def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2475 "// llvm.nvvm.compiler.error()", 2476 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2477 2478 2479// isspacep 2480 2481multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> { 2482 def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2483 "isspacep." # suffix # "\t$d, $a;", 2484 [(set Int1Regs:$d, (Intr Int32Regs:$a))]>, 2485 Requires<Preds>; 2486 def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2487 "isspacep." # suffix # "\t$d, $a;", 2488 [(set Int1Regs:$d, (Intr Int64Regs:$a))]>, 2489 Requires<Preds>; 2490} 2491 2492defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>; 2493defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>; 2494defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>; 2495defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>; 2496defm isspace_shared_cluster : ISSPACEP<"shared::cluster", 2497 int_nvvm_isspacep_shared_cluster, 2498 [hasPTX<78>, hasSM<90>]>; 2499 2500// Special register reads 2501def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2502 (ins SpecialRegs:$r), 2503 "mov.b32 \t$d, $r;", []>; 2504 2505def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2506def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2507def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2508def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2509def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2510def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2511def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2512def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2513def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2514def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2515def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2516def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2517def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2518def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2519def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2520def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2521def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2522def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2523def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2524def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2525def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2526def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2527def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2528def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2529def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2530def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2531def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2532def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2533def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2534def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2535def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2536def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2537 2538 2539// rotate builtin support 2540 2541def ROTATE_B32_HW_IMM 2542 : NVPTXInst<(outs Int32Regs:$dst), 2543 (ins Int32Regs:$src, i32imm:$amt), 2544 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2545 [(set Int32Regs:$dst, 2546 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2547 Requires<[hasHWROT32]> ; 2548 2549def ROTATE_B32_HW_REG 2550 : NVPTXInst<(outs Int32Regs:$dst), 2551 (ins Int32Regs:$src, Int32Regs:$amt), 2552 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2553 [(set Int32Regs:$dst, 2554 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2555 Requires<[hasHWROT32]> ; 2556 2557def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2558 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2559 Requires<[noHWROT32]> ; 2560 2561def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2562 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2563 Requires<[noHWROT32]> ; 2564 2565let hasSideEffects = false in { 2566 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2567 !strconcat("{{\n\t", 2568 ".reg .b32 %dummy;\n\t", 2569 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2570 "}}"), 2571 []> ; 2572 2573 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2574 !strconcat("{{\n\t", 2575 ".reg .b32 %dummy;\n\t", 2576 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2577 "}}"), 2578 []> ; 2579} 2580 2581let hasSideEffects = false in { 2582 def PACK_TWO_INT32 2583 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2584 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2585} 2586 2587def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2588 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2589 (GET_LO_INT64 Int64Regs:$src))> ; 2590 2591// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2592// no side effects. 2593let hasSideEffects = false in { 2594 def SHF_L_WRAP_B32_IMM 2595 : NVPTXInst<(outs Int32Regs:$dst), 2596 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2597 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2598 Requires<[hasHWROT32]>; 2599 2600 def SHF_L_WRAP_B32_REG 2601 : NVPTXInst<(outs Int32Regs:$dst), 2602 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2603 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2604 Requires<[hasHWROT32]>; 2605 2606 def SHF_R_WRAP_B32_IMM 2607 : NVPTXInst<(outs Int32Regs:$dst), 2608 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2609 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2610 Requires<[hasHWROT32]>; 2611 2612 def SHF_R_WRAP_B32_REG 2613 : NVPTXInst<(outs Int32Regs:$dst), 2614 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2615 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2616 Requires<[hasHWROT32]>; 2617} 2618 2619// HW version of rotate 64 2620def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2621 (PACK_TWO_INT32 2622 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2623 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2624 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2625 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2626 Requires<[hasHWROT32]>; 2627 2628def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2629 (PACK_TWO_INT32 2630 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2631 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2632 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2633 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2634 Requires<[hasHWROT32]>; 2635 2636 2637def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2638 (PACK_TWO_INT32 2639 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2640 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2641 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2642 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2643 Requires<[hasHWROT32]>; 2644 2645def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2646 (PACK_TWO_INT32 2647 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2648 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2649 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2650 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2651 Requires<[hasHWROT32]>; 2652 2653// SW version of rotate 64 2654def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2655 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, 2656 Requires<[noHWROT32]>; 2657def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2658 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2659 Requires<[noHWROT32]>; 2660def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2661 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2662 Requires<[noHWROT32]>; 2663def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2664 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2665 Requires<[noHWROT32]>; 2666 2667 2668//----------------------------------- 2669// Texture Intrinsics 2670//----------------------------------- 2671 2672// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2673// also defined in NVPTXReplaceImageHandles.cpp 2674 2675// texmode_independent 2676let IsTex = true, IsTexModeUnified = false in { 2677// Texture fetch instructions using handles 2678 2679class TEX_1D_base<string inst, NVPTXRegClass outtype, 2680 NVPTXRegClass intype, dag texsamp> 2681 : NVPTXInst<(outs outtype:$r, outtype:$g, 2682 outtype:$b, outtype:$a), 2683 !con(texsamp, (ins intype:$x)), 2684 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2685 []>; 2686 2687multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2688 def _RR : TEX_1D_base<inst, outtype, intype, 2689 (ins Int64Regs:$t, Int64Regs:$s)>; 2690 def _RI : TEX_1D_base<inst, outtype, intype, 2691 (ins Int64Regs:$t, i64imm:$s)>; 2692 def _IR : TEX_1D_base<inst, outtype, intype, 2693 (ins i64imm:$t, Int64Regs:$s)>; 2694 def _II : TEX_1D_base<inst, outtype, intype, 2695 (ins i64imm:$t, i64imm:$s)>; 2696} 2697 2698defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 2699defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2700defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 2701defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2702defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 2703defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2704 2705class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 2706 NVPTXRegClass intype, dag texsamp> 2707 : NVPTXInst<(outs outtype:$r, outtype:$g, 2708 outtype:$b, outtype:$a), 2709 !con(texsamp, (ins intype:$x, intype:$lod)), 2710 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", 2711 []>; 2712 2713multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, 2714 NVPTXRegClass intype> { 2715 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype, 2716 (ins Int64Regs:$t, Int64Regs:$s)>; 2717 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, 2718 (ins Int64Regs:$t, i64imm:$s)>; 2719 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, 2720 (ins i64imm:$t, Int64Regs:$s)>; 2721 def _II : TEX_1D_LEVEL_base<inst, outtype, intype, 2722 (ins i64imm:$t, i64imm:$s)>; 2723} 2724 2725defm TEX_1D_F32_F32_LEVEL : 2726 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2727defm TEX_1D_S32_F32_LEVEL : 2728 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2729defm TEX_1D_U32_F32_LEVEL : 2730 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2731 2732class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, 2733 NVPTXRegClass intype, dag texsamp> 2734 : NVPTXInst<(outs outtype:$r, outtype:$g, 2735 outtype:$b, outtype:$a), 2736 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), 2737 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," 2738 " \\{$gradx\\}, \\{$grady\\};", 2739 []>; 2740 2741multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, 2742 NVPTXRegClass intype> { 2743 def _RR : TEX_1D_GRAD_base<inst, outtype, intype, 2744 (ins Int64Regs:$t, Int64Regs:$s)>; 2745 def _RI : TEX_1D_GRAD_base<inst, outtype, intype, 2746 (ins Int64Regs:$t, i64imm:$s)>; 2747 def _IR : TEX_1D_GRAD_base<inst, outtype, intype, 2748 (ins i64imm:$t, Int64Regs:$s)>; 2749 def _II : TEX_1D_GRAD_base<inst, outtype, intype, 2750 (ins i64imm:$t, i64imm:$s)>; 2751} 2752 2753defm TEX_1D_F32_F32_GRAD 2754 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2755defm TEX_1D_S32_F32_GRAD 2756 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2757defm TEX_1D_U32_F32_GRAD 2758 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2759 2760class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 2761 NVPTXRegClass intype, dag texsamp> 2762 : NVPTXInst<(outs outtype:$r, outtype:$g, 2763 outtype:$b, outtype:$a), 2764 !con(texsamp, (ins Int32Regs:$l, intype:$x)), 2765 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", 2766 []>; 2767 2768multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, 2769 NVPTXRegClass intype> { 2770 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype, 2771 (ins Int64Regs:$t, Int64Regs:$s)>; 2772 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, 2773 (ins Int64Regs:$t, i64imm:$s)>; 2774 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, 2775 (ins i64imm:$t, Int64Regs:$s)>; 2776 def _II : TEX_1D_ARRAY_base<inst, outtype, intype, 2777 (ins i64imm:$t, i64imm:$s)>; 2778} 2779 2780defm TEX_1D_ARRAY_F32_F32 2781 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2782defm TEX_1D_ARRAY_F32_S32 2783 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 2784defm TEX_1D_ARRAY_S32_S32 2785 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 2786defm TEX_1D_ARRAY_S32_F32 2787 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2788defm TEX_1D_ARRAY_U32_S32 2789 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 2790defm TEX_1D_ARRAY_U32_F32 2791 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2792 2793class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2794 NVPTXRegClass intype, dag texsamp> 2795 : NVPTXInst<(outs outtype:$r, outtype:$g, 2796 outtype:$b, outtype:$a), 2797 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), 2798 inst # " \t\\{$r, $g, $b, $a\\}," 2799 " [$t, $s, \\{$l, $x\\}], $lod;", 2800 []>; 2801 2802multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2803 NVPTXRegClass intype> { 2804 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2805 (ins Int64Regs:$t, Int64Regs:$s)>; 2806 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2807 (ins Int64Regs:$t, i64imm:$s)>; 2808 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2809 (ins i64imm:$t, Int64Regs:$s)>; 2810 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2811 (ins i64imm:$t, i64imm:$s)>; 2812} 2813 2814defm TEX_1D_ARRAY_F32_F32_LEVEL 2815 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2816defm TEX_1D_ARRAY_S32_F32_LEVEL 2817 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2818defm TEX_1D_ARRAY_U32_F32_LEVEL 2819 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2820 2821class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2822 NVPTXRegClass intype, dag texsamp> 2823 : NVPTXInst<(outs outtype:$r, outtype:$g, 2824 outtype:$b, outtype:$a), 2825 !con(texsamp, (ins Int32Regs:$l, intype:$x, 2826 intype:$gradx, intype:$grady)), 2827 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," 2828 " \\{$gradx\\}, \\{$grady\\};", 2829 []>; 2830 2831multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 2832 NVPTXRegClass intype> { 2833 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2834 (ins Int64Regs:$t, Int64Regs:$s)>; 2835 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2836 (ins Int64Regs:$t, i64imm:$s)>; 2837 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2838 (ins i64imm:$t, Int64Regs:$s)>; 2839 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2840 (ins i64imm:$t, i64imm:$s)>; 2841} 2842 2843defm TEX_1D_ARRAY_F32_F32_GRAD 2844 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2845defm TEX_1D_ARRAY_S32_F32_GRAD 2846 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2847defm TEX_1D_ARRAY_U32_F32_GRAD 2848 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2849 2850class TEX_2D_base<string inst, NVPTXRegClass outtype, 2851 NVPTXRegClass intype, dag texsamp> 2852 : NVPTXInst<(outs outtype:$r, outtype:$g, 2853 outtype:$b, outtype:$a), 2854 !con(texsamp, (ins intype:$x, intype:$y)), 2855 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", 2856 []>; 2857 2858multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2859 def _RR : TEX_2D_base<inst, outtype, intype, 2860 (ins Int64Regs:$t, Int64Regs:$s)>; 2861 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; 2862 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; 2863 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; 2864} 2865 2866defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2867defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 2868defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 2869defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2870defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 2871defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2872 2873class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 2874 NVPTXRegClass intype, dag texsamp> 2875 : NVPTXInst<(outs outtype:$r, outtype:$g, 2876 outtype:$b, outtype:$a), 2877 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), 2878 inst # " \t\\{$r, $g, $b, $a\\}," 2879 " [$t, $s, \\{$x, $y\\}], $lod;", 2880 []>; 2881 2882multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, 2883 NVPTXRegClass intype> { 2884 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype, 2885 (ins Int64Regs:$t, Int64Regs:$s)>; 2886 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, 2887 (ins Int64Regs:$t, i64imm:$s)>; 2888 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, 2889 (ins i64imm:$t, Int64Regs:$s)>; 2890 def _II : TEX_2D_LEVEL_base<inst, outtype, intype, 2891 (ins i64imm:$t, i64imm:$s)>; 2892} 2893 2894defm TEX_2D_F32_F32_LEVEL : 2895 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2896defm TEX_2D_S32_F32_LEVEL : 2897 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2898defm TEX_2D_U32_F32_LEVEL : 2899 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2900 2901class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, 2902 NVPTXRegClass intype, dag texsamp> 2903 : NVPTXInst<(outs outtype:$r, outtype:$g, 2904 outtype:$b, outtype:$a), 2905 !con(texsamp, (ins intype:$x, intype:$y, 2906 intype:$gradx0, intype:$gradx1, 2907 intype:$grady0, intype:$grady1)), 2908 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," 2909 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 2910 []>; 2911 2912multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, 2913 NVPTXRegClass intype> { 2914 def _RR : TEX_2D_GRAD_base<inst, outtype, intype, 2915 (ins Int64Regs:$t, Int64Regs:$s)>; 2916 def _RI : TEX_2D_GRAD_base<inst, outtype, intype, 2917 (ins Int64Regs:$t, i64imm:$s)>; 2918 def _IR : TEX_2D_GRAD_base<inst, outtype, intype, 2919 (ins i64imm:$t, Int64Regs:$s)>; 2920 def _II : TEX_2D_GRAD_base<inst, outtype, intype, 2921 (ins i64imm:$t, i64imm:$s)>; 2922} 2923 2924defm TEX_2D_F32_F32_GRAD : 2925 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2926defm TEX_2D_S32_F32_GRAD : 2927 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2928defm TEX_2D_U32_F32_GRAD : 2929 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2930 2931class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 2932 NVPTXRegClass intype, dag texsamp> 2933 : NVPTXInst<(outs outtype:$r, outtype:$g, 2934 outtype:$b, outtype:$a), 2935 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), 2936 inst # " \t\\{$r, $g, $b, $a\\}," 2937 " [$t, $s, \\{$l, $x, $y, $y\\}];", 2938 []>; 2939 2940multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, 2941 NVPTXRegClass intype> { 2942 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype, 2943 (ins Int64Regs:$t, Int64Regs:$s)>; 2944 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, 2945 (ins Int64Regs:$t, i64imm:$s)>; 2946 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, 2947 (ins i64imm:$t, Int64Regs:$s)>; 2948 def _II : TEX_2D_ARRAY_base<inst, outtype, intype, 2949 (ins i64imm:$t, i64imm:$s)>; 2950} 2951 2952defm TEX_2D_ARRAY_F32_F32 2953 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2954defm TEX_2D_ARRAY_F32_S32 2955 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 2956defm TEX_2D_ARRAY_S32_S32 2957 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 2958defm TEX_2D_ARRAY_S32_F32 2959 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2960defm TEX_2D_ARRAY_U32_S32 2961 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 2962defm TEX_2D_ARRAY_U32_F32 2963 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2964 2965class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2966 NVPTXRegClass intype, dag texsamp> 2967 : NVPTXInst<(outs outtype:$r, outtype:$g, 2968 outtype:$b, outtype:$a), 2969 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2970 intype:$lod)), 2971 inst # " \t\\{$r, $g, $b, $a\\}," 2972 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2973 []>; 2974 2975multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2976 NVPTXRegClass intype> { 2977 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2978 (ins Int64Regs:$t, Int64Regs:$s)>; 2979 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2980 (ins Int64Regs:$t, i64imm:$s)>; 2981 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2982 (ins i64imm:$t, Int64Regs:$s)>; 2983 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2984 (ins i64imm:$t, i64imm:$s)>; 2985} 2986 2987defm TEX_2D_ARRAY_F32_F32_LEVEL 2988 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2989defm TEX_2D_ARRAY_S32_F32_LEVEL 2990 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2991defm TEX_2D_ARRAY_U32_F32_LEVEL 2992 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2993 2994class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2995 NVPTXRegClass intype, dag texsamp> 2996 : NVPTXInst<(outs outtype:$r, outtype:$g, 2997 outtype:$b, outtype:$a), 2998 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2999 intype:$gradx0, intype:$gradx1, 3000 intype:$grady0, intype:$grady1)), 3001 inst # " \t\\{$r, $g, $b, $a\\}," 3002 " [$t, $s, \\{$l, $x, $y, $y\\}]," 3003 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3004 []>; 3005 3006multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3007 NVPTXRegClass intype> { 3008 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3009 (ins Int64Regs:$t, Int64Regs:$s)>; 3010 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3011 (ins Int64Regs:$t, i64imm:$s)>; 3012 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3013 (ins i64imm:$t, Int64Regs:$s)>; 3014 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3015 (ins i64imm:$t, i64imm:$s)>; 3016} 3017 3018defm TEX_2D_ARRAY_F32_F32_GRAD 3019 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3020defm TEX_2D_ARRAY_S32_F32_GRAD 3021 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3022defm TEX_2D_ARRAY_U32_F32_GRAD 3023 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3024 3025class TEX_3D_base<string inst, NVPTXRegClass outtype, 3026 NVPTXRegClass intype, dag texsamp> 3027 : NVPTXInst<(outs outtype:$r, outtype:$g, 3028 outtype:$b, outtype:$a), 3029 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3030 inst # " \t\\{$r, $g, $b, $a\\}," 3031 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3032 []>; 3033 3034multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3035 def _RR : TEX_3D_base<inst, outtype, intype, 3036 (ins Int64Regs:$t, Int64Regs:$s)>; 3037 def _RI : TEX_3D_base<inst, outtype, intype, 3038 (ins Int64Regs:$t, i64imm:$s)>; 3039 def _IR : TEX_3D_base<inst, outtype, intype, 3040 (ins i64imm:$t, Int64Regs:$s)>; 3041 def _II : TEX_3D_base<inst, outtype, intype, 3042 (ins i64imm:$t, i64imm:$s)>; 3043} 3044 3045defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3046defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3047defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3048defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3049defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3050defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3051 3052class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3053 NVPTXRegClass intype, dag texsamp> 3054 : NVPTXInst<(outs outtype:$r, outtype:$g, 3055 outtype:$b, outtype:$a), 3056 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3057 intype:$lod)), 3058 inst # " \t\\{$r, $g, $b, $a\\}," 3059 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3060 []>; 3061 3062multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, 3063 NVPTXRegClass intype> { 3064 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype, 3065 (ins Int64Regs:$t, Int64Regs:$s)>; 3066 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, 3067 (ins Int64Regs:$t, i64imm:$s)>; 3068 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, 3069 (ins i64imm:$t, Int64Regs:$s)>; 3070 def _II : TEX_3D_LEVEL_base<inst, outtype, intype, 3071 (ins i64imm:$t, i64imm:$s)>; 3072} 3073 3074defm TEX_3D_F32_F32_LEVEL 3075 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3076defm TEX_3D_S32_F32_LEVEL 3077 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3078defm TEX_3D_U32_F32_LEVEL 3079 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3080 3081class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3082 NVPTXRegClass intype, dag texsamp> 3083 : NVPTXInst<(outs outtype:$r, outtype:$g, 3084 outtype:$b, outtype:$a), 3085 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3086 intype :$gradx0, intype:$gradx1, 3087 intype:$gradx2, intype:$grady0, 3088 intype:$grady1, intype:$grady2)), 3089 inst # " \t\\{$r, $g, $b, $a\\}," 3090 " [$t, $s, \\{$x, $y, $z, $z\\}]," 3091 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3092 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3093 []>; 3094 3095multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, 3096 NVPTXRegClass intype> { 3097 def _RR : TEX_3D_GRAD_base<inst, outtype, intype, 3098 (ins Int64Regs:$t, Int64Regs:$s)>; 3099 def _RI : TEX_3D_GRAD_base<inst, outtype, intype, 3100 (ins Int64Regs:$t, i64imm:$s)>; 3101 def _IR : TEX_3D_GRAD_base<inst, outtype, intype, 3102 (ins i64imm:$t, Int64Regs:$s)>; 3103 def _II : TEX_3D_GRAD_base<inst, outtype, intype, 3104 (ins i64imm:$t, i64imm:$s)>; 3105} 3106 3107defm TEX_3D_F32_F32_GRAD 3108 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3109defm TEX_3D_S32_F32_GRAD 3110 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3111defm TEX_3D_U32_F32_GRAD 3112 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3113 3114class TEX_CUBE_base<string inst, NVPTXRegClass outtype, 3115 NVPTXRegClass intype, dag texsamp> 3116 : NVPTXInst<(outs outtype:$r, outtype:$g, 3117 outtype:$b, outtype:$a), 3118 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3119 inst # " \t\\{$r, $g, $b, $a\\}," 3120 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3121 []>; 3122 3123multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3124 def _RR : TEX_CUBE_base<inst, outtype, intype, 3125 (ins Int64Regs:$t, Int64Regs:$s)>; 3126 def _RI : TEX_CUBE_base<inst, outtype, intype, 3127 (ins Int64Regs:$t, i64imm:$s)>; 3128 def _IR : TEX_CUBE_base<inst, outtype, intype, 3129 (ins i64imm:$t, Int64Regs:$s)>; 3130 def _II : TEX_CUBE_base<inst, outtype, intype, 3131 (ins i64imm:$t, i64imm:$s)>; 3132} 3133 3134defm TEX_CUBE_F32_F32 3135 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3136defm TEX_CUBE_S32_F32 3137 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3138defm TEX_CUBE_U32_F32 3139 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3140 3141class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3142 NVPTXRegClass intype, dag texsamp> 3143 : NVPTXInst<(outs outtype:$r, outtype:$g, 3144 outtype:$b, outtype:$a), 3145 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3146 intype:$lod)), 3147 inst # " \t\\{$r, $g, $b, $a\\}," 3148 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3149 []>; 3150 3151multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3152 NVPTXRegClass intype> { 3153 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3154 (ins Int64Regs:$t, Int64Regs:$s)>; 3155 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3156 (ins Int64Regs:$t, i64imm:$s)>; 3157 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3158 (ins i64imm:$t, Int64Regs:$s)>; 3159 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3160 (ins i64imm:$t, i64imm:$s)>; 3161} 3162 3163defm TEX_CUBE_F32_F32_LEVEL 3164 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3165defm TEX_CUBE_S32_F32_LEVEL 3166 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3167defm TEX_CUBE_U32_F32_LEVEL 3168 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3169 3170class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3171 NVPTXRegClass intype, dag texsamp> 3172 : NVPTXInst<(outs outtype:$r, outtype:$g, 3173 outtype:$b, outtype:$a), 3174 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3175 intype:$z)), 3176 inst # " \t\\{$r, $g, $b, $a\\}," 3177 " [$t, $s, \\{$l, $x, $y, $z\\}];", 3178 []>; 3179 3180multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3181 NVPTXRegClass intype> { 3182 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3183 (ins Int64Regs:$t, Int64Regs:$s)>; 3184 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3185 (ins Int64Regs:$t, i64imm:$s)>; 3186 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3187 (ins i64imm:$t, Int64Regs:$s)>; 3188 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3189 (ins i64imm:$t, i64imm:$s)>; 3190} 3191 3192defm TEX_CUBE_ARRAY_F32_F32 3193 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3194defm TEX_CUBE_ARRAY_S32_F32 3195 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3196defm TEX_CUBE_ARRAY_U32_F32 3197 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3198 3199class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3200 NVPTXRegClass intype, dag texsamp> 3201 : NVPTXInst<(outs outtype:$r, outtype:$g, 3202 outtype:$b, outtype:$a), 3203 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3204 intype:$z, intype:$lod)), 3205 inst # " \t\\{$r, $g, $b, $a\\}," 3206 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3207 []>; 3208 3209multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3210 NVPTXRegClass intype> { 3211 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3212 (ins Int64Regs:$t, Int64Regs:$s)>; 3213 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3214 (ins Int64Regs:$t, i64imm:$s)>; 3215 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3216 (ins i64imm:$t, Int64Regs:$s)>; 3217 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3218 (ins i64imm:$t, i64imm:$s)>; 3219} 3220 3221defm TEX_CUBE_ARRAY_F32_F32_LEVEL 3222 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3223 Float32Regs, Float32Regs>; 3224defm TEX_CUBE_ARRAY_S32_F32_LEVEL 3225 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3226 Int32Regs, Float32Regs>; 3227defm TEX_CUBE_ARRAY_U32_F32_LEVEL 3228 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3229 Int32Regs, Float32Regs>; 3230 3231class TLD4_2D_base<string inst, NVPTXRegClass outtype, 3232 NVPTXRegClass intype, dag texsamp> 3233 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3234 outtype:$v2, outtype:$v3), 3235 !con(texsamp, (ins intype:$x, intype:$y)), 3236 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", 3237 []>; 3238 3239multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3240 def _RR : TLD4_2D_base<inst, outtype, intype, 3241 (ins Int64Regs:$t, Int64Regs:$s)>; 3242 def _RI : TLD4_2D_base<inst, outtype, intype, 3243 (ins Int64Regs:$t, i64imm:$s)>; 3244 def _IR : TLD4_2D_base<inst, outtype, intype, 3245 (ins i64imm:$t, Int64Regs:$s)>; 3246 def _II : TLD4_2D_base<inst, outtype, intype, 3247 (ins i64imm:$t, i64imm:$s)>; 3248} 3249 3250defm TLD4_R_2D_F32_F32 3251 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3252defm TLD4_G_2D_F32_F32 3253 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3254defm TLD4_B_2D_F32_F32 3255 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3256defm TLD4_A_2D_F32_F32 3257 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3258 3259defm TLD4_R_2D_S32_F32 3260 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3261defm TLD4_G_2D_S32_F32 3262 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3263defm TLD4_B_2D_S32_F32 3264 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3265defm TLD4_A_2D_S32_F32 3266 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3267 3268defm TLD4_R_2D_U32_F32 3269 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3270defm TLD4_G_2D_U32_F32 3271 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3272defm TLD4_B_2D_U32_F32 3273 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3274defm TLD4_A_2D_U32_F32 3275 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3276 3277} 3278 3279 3280// texmode_unified 3281let IsTex = true, IsTexModeUnified = true in { 3282// Texture fetch instructions using handles 3283 3284class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, 3285 NVPTXRegClass intype, dag tex> 3286 : NVPTXInst<(outs outtype:$r, outtype:$g, 3287 outtype:$b, outtype:$a), 3288 !con(tex, (ins intype:$x)), 3289 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3290 []>; 3291 3292multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, 3293 NVPTXRegClass intype> { 3294 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3295 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; 3296} 3297 3298defm TEX_UNIFIED_1D_F32_S32 3299 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 3300defm TEX_UNIFIED_1D_F32_F32 3301 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3302defm TEX_UNIFIED_1D_S32_S32 3303 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 3304defm TEX_UNIFIED_1D_S32_F32 3305 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3306defm TEX_UNIFIED_1D_U32_S32 3307 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 3308defm TEX_UNIFIED_1D_U32_F32 3309 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3310 3311class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 3312 NVPTXRegClass intype, dag tex> 3313 : NVPTXInst<(outs outtype:$r, outtype:$g, 3314 outtype:$b, outtype:$a), 3315 !con(tex, (ins intype:$x, intype:$lod)), 3316 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", 3317 []>; 3318 3319multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, 3320 NVPTXRegClass intype> { 3321 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3322 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3323} 3324 3325defm TEX_UNIFIED_1D_F32_F32_LEVEL 3326 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3327defm TEX_UNIFIED_1D_S32_F32_LEVEL 3328 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3329defm TEX_UNIFIED_1D_U32_F32_LEVEL 3330 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3331 3332class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, 3333 NVPTXRegClass intype, dag tex> 3334 : NVPTXInst<(outs outtype:$r, outtype:$g, 3335 outtype:$b, outtype:$a), 3336 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), 3337 inst # " \t\\{$r, $g, $b, $a\\}," 3338 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3339 []>; 3340 3341multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, 3342 NVPTXRegClass intype> { 3343 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3344 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3345} 3346 3347defm TEX_UNIFIED_1D_F32_F32_GRAD 3348 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3349defm TEX_UNIFIED_1D_S32_F32_GRAD 3350 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3351defm TEX_UNIFIED_1D_U32_F32_GRAD 3352 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3353 3354class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 3355 NVPTXRegClass intype, dag tex> 3356 : NVPTXInst<(outs outtype:$r, outtype:$g, 3357 outtype:$b, outtype:$a), 3358 !con(tex, (ins Int32Regs:$l, intype:$x)), 3359 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", 3360 []>; 3361 3362multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, 3363 NVPTXRegClass intype> { 3364 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3365 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3366} 3367 3368defm TEX_UNIFIED_1D_ARRAY_F32_S32 3369 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 3370defm TEX_UNIFIED_1D_ARRAY_F32_F32 3371 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 3372defm TEX_UNIFIED_1D_ARRAY_S32_S32 3373 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 3374defm TEX_UNIFIED_1D_ARRAY_S32_F32 3375 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 3376defm TEX_UNIFIED_1D_ARRAY_U32_S32 3377 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 3378defm TEX_UNIFIED_1D_ARRAY_U32_F32 3379 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 3380 3381class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3382 NVPTXRegClass intype, dag tex> 3383 : NVPTXInst<(outs outtype:$r, outtype:$g, 3384 outtype:$b, outtype:$a), 3385 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), 3386 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", 3387 []>; 3388 3389multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3390 NVPTXRegClass intype> { 3391 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3392 (ins Int64Regs:$t)>; 3393 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3394 (ins i64imm:$t)>; 3395} 3396 3397defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3398 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", 3399 Float32Regs, Float32Regs>; 3400defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3401 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", 3402 Int32Regs, Float32Regs>; 3403defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3404 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", 3405 Int32Regs, Float32Regs>; 3406 3407class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3408 NVPTXRegClass intype, dag tex> 3409 : NVPTXInst<(outs outtype:$r, outtype:$g, 3410 outtype:$b, outtype:$a), 3411 !con(tex, (ins Int32Regs:$l, intype:$x, 3412 intype:$gradx, intype:$grady)), 3413 inst # " \t\\{$r, $g, $b, $a\\}," 3414 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3415 []>; 3416 3417multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3418 NVPTXRegClass intype> { 3419 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3420 (ins Int64Regs:$t)>; 3421 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3422 (ins i64imm:$t)>; 3423} 3424 3425defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3426 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", 3427 Float32Regs, Float32Regs>; 3428defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3429 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", 3430 Int32Regs, Float32Regs>; 3431defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3432 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", 3433 Int32Regs, Float32Regs>; 3434 3435class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3436 NVPTXRegClass intype, dag tex> 3437 : NVPTXInst<(outs outtype:$r, outtype:$g, 3438 outtype:$b, outtype:$a), 3439 !con(tex, (ins intype:$x, intype:$y)), 3440 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", 3441 []>; 3442 3443multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3444 NVPTXRegClass intype> { 3445 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3446 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3447} 3448 3449defm TEX_UNIFIED_2D_F32_S32 3450 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 3451defm TEX_UNIFIED_2D_F32_F32 3452 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3453defm TEX_UNIFIED_2D_S32_S32 3454 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 3455defm TEX_UNIFIED_2D_S32_F32 3456 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3457defm TEX_UNIFIED_2D_U32_S32 3458 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 3459defm TEX_UNIFIED_2D_U32_F32 3460 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3461 3462class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 3463 NVPTXRegClass intype, dag tex> 3464 : NVPTXInst<(outs outtype:$r, outtype:$g, 3465 outtype:$b, outtype:$a), 3466 !con(tex, (ins intype:$x, intype:$y, intype:$lod)), 3467 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", 3468 []>; 3469 3470multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, 3471 NVPTXRegClass intype> { 3472 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3473 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3474} 3475 3476defm TEX_UNIFIED_2D_F32_F32_LEVEL 3477 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3478defm TEX_UNIFIED_2D_S32_F32_LEVEL 3479 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3480defm TEX_UNIFIED_2D_U32_F32_LEVEL 3481 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3482 3483class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, 3484 NVPTXRegClass intype, dag tex> 3485 : NVPTXInst<(outs outtype:$r, outtype:$g, 3486 outtype:$b, outtype:$a), 3487 !con(tex, (ins intype:$x, intype:$y, 3488 intype:$gradx0, intype:$gradx1, 3489 intype:$grady0, intype:$grady1)), 3490 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," 3491 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3492 []>; 3493multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, 3494 NVPTXRegClass intype> { 3495 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3496 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3497} 3498 3499defm TEX_UNIFIED_2D_F32_F32_GRAD 3500 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3501defm TEX_UNIFIED_2D_S32_F32_GRAD 3502 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3503defm TEX_UNIFIED_2D_U32_F32_GRAD 3504 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3505 3506class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 3507 NVPTXRegClass intype, dag tex> 3508 : NVPTXInst<(outs outtype:$r, outtype:$g, 3509 outtype:$b, outtype:$a), 3510 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), 3511 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", 3512 []>; 3513multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, 3514 NVPTXRegClass intype> { 3515 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3516 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3517} 3518 3519defm TEX_UNIFIED_2D_ARRAY_F32_S32 3520 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 3521defm TEX_UNIFIED_2D_ARRAY_F32_F32 3522 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3523defm TEX_UNIFIED_2D_ARRAY_S32_S32 3524 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 3525defm TEX_UNIFIED_2D_ARRAY_S32_F32 3526 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3527defm TEX_UNIFIED_2D_ARRAY_U32_S32 3528 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 3529defm TEX_UNIFIED_2D_ARRAY_U32_F32 3530 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3531 3532class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3533 NVPTXRegClass intype, dag tex> 3534 : NVPTXInst<(outs outtype:$r, outtype:$g, 3535 outtype:$b, outtype:$a), 3536 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3537 intype:$lod)), 3538 inst # " \t\\{$r, $g, $b, $a\\}," 3539 " [$t, \\{$l, $x, $y, $y\\}], $lod;", 3540 []>; 3541multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3542 NVPTXRegClass intype> { 3543 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3544 (ins Int64Regs:$t)>; 3545 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3546 (ins i64imm:$t)>; 3547} 3548 3549defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3550 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", 3551 Float32Regs, Float32Regs>; 3552defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3553 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", 3554 Int32Regs, Float32Regs>; 3555defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3556 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", 3557 Int32Regs, Float32Regs>; 3558 3559class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3560 NVPTXRegClass intype, dag tex> 3561 : NVPTXInst<(outs outtype:$r, outtype:$g, 3562 outtype:$b, outtype:$a), 3563 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3564 intype:$gradx0, intype:$gradx1, 3565 intype:$grady0, intype:$grady1)), 3566 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," 3567 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3568 []>; 3569multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3570 NVPTXRegClass intype> { 3571 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3572 (ins Int64Regs:$t)>; 3573 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3574 (ins i64imm:$t)>; 3575} 3576 3577defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3578 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", 3579 Float32Regs, Float32Regs>; 3580defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3581 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", 3582 Int32Regs, Float32Regs>; 3583defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3584 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", 3585 Int32Regs, Float32Regs>; 3586 3587class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, 3588 NVPTXRegClass intype, dag tex> 3589 : NVPTXInst<(outs outtype:$r, outtype:$g, 3590 outtype:$b, outtype:$a), 3591 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3592 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3593 []>; 3594multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, 3595 NVPTXRegClass intype> { 3596 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3597 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; 3598} 3599 3600defm TEX_UNIFIED_3D_F32_S32 3601 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3602defm TEX_UNIFIED_3D_F32_F32 3603 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3604defm TEX_UNIFIED_3D_S32_S32 3605 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3606defm TEX_UNIFIED_3D_S32_F32 3607 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3608defm TEX_UNIFIED_3D_U32_S32 3609 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3610defm TEX_UNIFIED_3D_U32_F32 3611 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3612 3613class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3614 NVPTXRegClass intype, dag tex> 3615 : NVPTXInst<(outs outtype:$r, outtype:$g, 3616 outtype:$b, outtype:$a), 3617 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3618 inst # " \t\\{$r, $g, $b, $a\\}," 3619 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3620 []>; 3621multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, 3622 NVPTXRegClass intype> { 3623 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3624 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3625} 3626 3627defm TEX_UNIFIED_3D_F32_F32_LEVEL 3628 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3629defm TEX_UNIFIED_3D_S32_F32_LEVEL 3630 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3631defm TEX_UNIFIED_3D_U32_F32_LEVEL 3632 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3633 3634class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3635 NVPTXRegClass intype, dag tex> 3636 : NVPTXInst<(outs outtype:$r, outtype:$g, 3637 outtype:$b, outtype:$a), 3638 !con(tex, (ins intype:$x, intype:$y, intype:$z, 3639 intype:$gradx0, intype:$gradx1, 3640 intype:$gradx2, intype:$grady0, 3641 intype:$grady1, intype:$grady2)), 3642 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 3643 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3644 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3645 []>; 3646multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, 3647 NVPTXRegClass intype> { 3648 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3649 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3650} 3651 3652defm TEX_UNIFIED_3D_F32_F32_GRAD 3653 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3654defm TEX_UNIFIED_3D_S32_F32_GRAD 3655 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3656defm TEX_UNIFIED_3D_U32_F32_GRAD 3657 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3658 3659class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, 3660 NVPTXRegClass intype, dag tex> 3661 : NVPTXInst<(outs outtype:$r, outtype:$g, 3662 outtype:$b, outtype:$a), 3663 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3664 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3665 []>; 3666multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, 3667 NVPTXRegClass intype> { 3668 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3669 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; 3670} 3671 3672defm TEX_UNIFIED_CUBE_F32_F32 3673 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3674defm TEX_UNIFIED_CUBE_S32_F32 3675 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3676defm TEX_UNIFIED_CUBE_U32_F32 3677 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3678 3679class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3680 NVPTXRegClass intype, dag tex> 3681 : NVPTXInst<(outs outtype:$r, outtype:$g, 3682 outtype:$b, outtype:$a), 3683 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3684 inst # " \t\\{$r, $g, $b, $a\\}," 3685 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3686 []>; 3687multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3688 NVPTXRegClass intype> { 3689 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3690 (ins Int64Regs:$t)>; 3691 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3692 (ins i64imm:$t)>; 3693} 3694 3695defm TEX_UNIFIED_CUBE_F32_F32_LEVEL 3696 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", 3697 Float32Regs, Float32Regs>; 3698defm TEX_UNIFIED_CUBE_S32_F32_LEVEL 3699 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", 3700 Int32Regs, Float32Regs>; 3701defm TEX_UNIFIED_CUBE_U32_F32_LEVEL 3702 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", 3703 Int32Regs, Float32Regs>; 3704 3705class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3706 NVPTXRegClass intype, dag tex> 3707 : NVPTXInst<(outs outtype:$r, outtype:$g, 3708 outtype:$b, outtype:$a), 3709 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), 3710 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", 3711 []>; 3712multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3713 NVPTXRegClass intype> { 3714 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3715 (ins Int64Regs:$t)>; 3716 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3717 (ins i64imm:$t)>; 3718} 3719 3720defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 3721 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3722defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 3723 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3724defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 3725 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3726 3727class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3728 NVPTXRegClass intype, dag tex> 3729 : NVPTXInst<(outs outtype:$r, outtype:$g, 3730 outtype:$b, outtype:$a), 3731 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 3732 intype:$lod)), 3733 inst # " \t\\{$r, $g, $b, $a\\}," 3734 " [$t, \\{$l, $x, $y, $z\\}], $lod;", 3735 []>; 3736multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3737 NVPTXRegClass intype> { 3738 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3739 (ins Int64Regs:$t)>; 3740 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3741 (ins i64imm:$t)>; 3742} 3743 3744defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3745 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3746 Float32Regs, Float32Regs>; 3747defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3748 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3749 Int32Regs, Float32Regs>; 3750defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3751 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3752 Int32Regs, Float32Regs>; 3753 3754class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3755 NVPTXRegClass intype, dag tex> 3756 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3757 outtype:$v2, outtype:$v3), 3758 !con(tex, (ins intype:$x, intype:$y)), 3759 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", 3760 []>; 3761multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3762 NVPTXRegClass intype> { 3763 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3764 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3765} 3766 3767defm TLD4_UNIFIED_R_2D_F32_F32 3768 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3769defm TLD4_UNIFIED_G_2D_F32_F32 3770 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3771defm TLD4_UNIFIED_B_2D_F32_F32 3772 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3773defm TLD4_UNIFIED_A_2D_F32_F32 3774 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3775 3776defm TLD4_UNIFIED_R_2D_S32_F32 3777 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3778defm TLD4_UNIFIED_G_2D_S32_F32 3779 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3780defm TLD4_UNIFIED_B_2D_S32_F32 3781 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3782defm TLD4_UNIFIED_A_2D_S32_F32 3783 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3784 3785defm TLD4_UNIFIED_R_2D_U32_F32 3786 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3787defm TLD4_UNIFIED_G_2D_U32_F32 3788 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3789defm TLD4_UNIFIED_B_2D_U32_F32 3790 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3791defm TLD4_UNIFIED_A_2D_U32_F32 3792 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3793 3794} 3795 3796 3797 3798//=== Surface load instructions 3799 3800let IsSuld = true in { 3801 3802class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf> 3803 : NVPTXInst<(outs outtype:$r), 3804 !con(surf, (ins Int32Regs:$x)), 3805 inst # " \\{$r\\}, [$s, \\{$x\\}];", 3806 []>; 3807multiclass SULD_1D<string inst, NVPTXRegClass outtype> { 3808 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>; 3809 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; 3810} 3811 3812defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; 3813defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; 3814defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; 3815defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; 3816 3817defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; 3818defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; 3819defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; 3820defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; 3821 3822defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; 3823defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; 3824defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; 3825defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; 3826 3827class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3828 : NVPTXInst<(outs outtype:$r), 3829 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3830 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", 3831 []>; 3832multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { 3833 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3834 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3835} 3836 3837defm SULD_1D_ARRAY_I8_CLAMP 3838 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; 3839defm SULD_1D_ARRAY_I16_CLAMP 3840 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; 3841defm SULD_1D_ARRAY_I32_CLAMP 3842 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; 3843defm SULD_1D_ARRAY_I64_CLAMP 3844 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; 3845 3846defm SULD_1D_ARRAY_I8_TRAP 3847 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; 3848defm SULD_1D_ARRAY_I16_TRAP 3849 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; 3850defm SULD_1D_ARRAY_I32_TRAP 3851 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; 3852defm SULD_1D_ARRAY_I64_TRAP 3853 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; 3854 3855defm SULD_1D_ARRAY_I8_ZERO 3856 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; 3857defm SULD_1D_ARRAY_I16_ZERO 3858 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; 3859defm SULD_1D_ARRAY_I32_ZERO 3860 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; 3861defm SULD_1D_ARRAY_I64_ZERO 3862 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; 3863 3864class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf> 3865 : NVPTXInst<(outs outtype:$r), 3866 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3867 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", 3868 []>; 3869multiclass SULD_2D<string inst, NVPTXRegClass outtype> { 3870 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>; 3871 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; 3872} 3873 3874defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; 3875defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; 3876defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; 3877defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; 3878 3879defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; 3880defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; 3881defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; 3882defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; 3883 3884defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; 3885defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; 3886defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; 3887defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; 3888 3889class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3890 : NVPTXInst<(outs outtype:$r), 3891 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3892 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3893 []>; 3894multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { 3895 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3896 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3897} 3898 3899defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; 3900defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; 3901defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; 3902defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; 3903 3904defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; 3905defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; 3906defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; 3907defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; 3908 3909defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; 3910defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; 3911defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; 3912defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; 3913 3914class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf> 3915 : NVPTXInst<(outs outtype:$r), 3916 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 3917 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3918 []>; 3919multiclass SULD_3D<string inst, NVPTXRegClass outtype> { 3920 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>; 3921 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; 3922} 3923 3924defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; 3925defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; 3926defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; 3927defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; 3928 3929defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; 3930defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; 3931defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; 3932defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; 3933 3934defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; 3935defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; 3936defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; 3937defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; 3938} 3939 3940let IsSuld = 2 in { 3941 3942class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3943 : NVPTXInst<(outs outtype:$r, outtype:$g), 3944 !con(surf, (ins Int32Regs:$x)), 3945 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", 3946 []>; 3947multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { 3948 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3949 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; 3950} 3951 3952defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; 3953defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; 3954defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; 3955defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; 3956 3957defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; 3958defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; 3959defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; 3960defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; 3961 3962defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; 3963defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; 3964defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; 3965defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; 3966 3967class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3968 : NVPTXInst<(outs outtype:$r, outtype:$g), 3969 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3970 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3971 []>; 3972multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 3973 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3974 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 3975} 3976 3977defm SULD_1D_ARRAY_V2I8_CLAMP 3978 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; 3979defm SULD_1D_ARRAY_V2I16_CLAMP 3980 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; 3981defm SULD_1D_ARRAY_V2I32_CLAMP 3982 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; 3983defm SULD_1D_ARRAY_V2I64_CLAMP 3984 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; 3985 3986defm SULD_1D_ARRAY_V2I8_TRAP 3987 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; 3988defm SULD_1D_ARRAY_V2I16_TRAP 3989 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; 3990defm SULD_1D_ARRAY_V2I32_TRAP 3991 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; 3992defm SULD_1D_ARRAY_V2I64_TRAP 3993 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; 3994 3995defm SULD_1D_ARRAY_V2I8_ZERO 3996 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; 3997defm SULD_1D_ARRAY_V2I16_ZERO 3998 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; 3999defm SULD_1D_ARRAY_V2I32_ZERO 4000 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; 4001defm SULD_1D_ARRAY_V2I64_ZERO 4002 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; 4003 4004class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4005 : NVPTXInst<(outs outtype:$r, outtype:$g), 4006 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4007 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4008 []>; 4009multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { 4010 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4011 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; 4012} 4013 4014defm SULD_2D_V2I8_CLAMP 4015 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; 4016defm SULD_2D_V2I16_CLAMP 4017 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; 4018defm SULD_2D_V2I32_CLAMP 4019 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; 4020defm SULD_2D_V2I64_CLAMP 4021 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; 4022 4023defm SULD_2D_V2I8_TRAP 4024 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; 4025defm SULD_2D_V2I16_TRAP 4026 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; 4027defm SULD_2D_V2I32_TRAP 4028 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; 4029defm SULD_2D_V2I64_TRAP 4030 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; 4031 4032defm SULD_2D_V2I8_ZERO 4033 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; 4034defm SULD_2D_V2I16_ZERO 4035 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; 4036defm SULD_2D_V2I32_ZERO 4037 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; 4038defm SULD_2D_V2I64_ZERO 4039 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; 4040 4041class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4042 : NVPTXInst<(outs outtype:$r, outtype:$g), 4043 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4044 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", 4045 []>; 4046multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 4047 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4048 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 4049} 4050 4051defm SULD_2D_ARRAY_V2I8_CLAMP 4052 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; 4053defm SULD_2D_ARRAY_V2I16_CLAMP 4054 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; 4055defm SULD_2D_ARRAY_V2I32_CLAMP 4056 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; 4057defm SULD_2D_ARRAY_V2I64_CLAMP 4058 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; 4059 4060defm SULD_2D_ARRAY_V2I8_TRAP 4061 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; 4062defm SULD_2D_ARRAY_V2I16_TRAP 4063 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; 4064defm SULD_2D_ARRAY_V2I32_TRAP 4065 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; 4066defm SULD_2D_ARRAY_V2I64_TRAP 4067 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; 4068 4069defm SULD_2D_ARRAY_V2I8_ZERO 4070 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; 4071defm SULD_2D_ARRAY_V2I16_ZERO 4072 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; 4073defm SULD_2D_ARRAY_V2I32_ZERO 4074 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; 4075defm SULD_2D_ARRAY_V2I64_ZERO 4076 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; 4077 4078class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4079 : NVPTXInst<(outs outtype:$r, outtype:$g), 4080 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4081 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4082 []>; 4083multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { 4084 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4085 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; 4086} 4087 4088defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; 4089defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; 4090defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; 4091defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; 4092 4093defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; 4094defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; 4095defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; 4096defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; 4097 4098defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; 4099defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; 4100defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; 4101defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; 4102 4103} 4104 4105let IsSuld = 3 in { 4106 4107class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4108 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4109 !con(surf, (ins Int32Regs:$x)), 4110 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4111 []>; 4112multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { 4113 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4114 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; 4115} 4116 4117defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; 4118defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; 4119defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; 4120 4121defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; 4122defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; 4123defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; 4124 4125defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; 4126defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; 4127defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; 4128 4129class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4130 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4131 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 4132 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", 4133 []>; 4134multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4135 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4136 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4137} 4138 4139defm SULD_1D_ARRAY_V4I8_CLAMP 4140 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; 4141defm SULD_1D_ARRAY_V4I16_CLAMP 4142 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; 4143defm SULD_1D_ARRAY_V4I32_CLAMP 4144 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; 4145 4146defm SULD_1D_ARRAY_V4I8_TRAP 4147 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; 4148defm SULD_1D_ARRAY_V4I16_TRAP 4149 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; 4150defm SULD_1D_ARRAY_V4I32_TRAP 4151 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; 4152 4153defm SULD_1D_ARRAY_V4I8_ZERO 4154 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; 4155defm SULD_1D_ARRAY_V4I16_ZERO 4156 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; 4157defm SULD_1D_ARRAY_V4I32_ZERO 4158 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; 4159 4160class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4161 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4162 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4163 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4164 []>; 4165multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { 4166 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4167 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; 4168} 4169 4170defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; 4171defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; 4172defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; 4173 4174defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; 4175defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; 4176defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; 4177 4178defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; 4179defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; 4180defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; 4181 4182class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4183 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4184 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4185 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", 4186 []>; 4187multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4188 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4189 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4190} 4191 4192defm SULD_2D_ARRAY_V4I8_CLAMP 4193 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; 4194defm SULD_2D_ARRAY_V4I16_CLAMP 4195 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; 4196defm SULD_2D_ARRAY_V4I32_CLAMP 4197 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; 4198 4199defm SULD_2D_ARRAY_V4I8_TRAP 4200 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; 4201defm SULD_2D_ARRAY_V4I16_TRAP 4202 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; 4203defm SULD_2D_ARRAY_V4I32_TRAP 4204 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; 4205 4206defm SULD_2D_ARRAY_V4I8_ZERO 4207 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; 4208defm SULD_2D_ARRAY_V4I16_ZERO 4209 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; 4210defm SULD_2D_ARRAY_V4I32_ZERO 4211 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; 4212 4213class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4214 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4215 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4216 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", 4217 []>; 4218multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { 4219 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4220 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; 4221} 4222 4223defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; 4224defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; 4225defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; 4226 4227defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; 4228defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; 4229defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; 4230 4231defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; 4232defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; 4233defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; 4234 4235} 4236 4237//----------------------------------- 4238// Texture Query Intrinsics 4239//----------------------------------- 4240 4241let IsSurfTexQuery = true in { 4242def TXQ_CHANNEL_ORDER_R 4243 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4244 "txq.channel_order.b32 \t$d, [$a];", 4245 []>; 4246def TXQ_CHANNEL_ORDER_I 4247 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4248 "txq.channel_order.b32 \t$d, [$a];", 4249 []>; 4250def TXQ_CHANNEL_DATA_TYPE_R 4251 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4252 "txq.channel_data_type.b32 \t$d, [$a];", 4253 []>; 4254def TXQ_CHANNEL_DATA_TYPE_I 4255 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4256 "txq.channel_data_type.b32 \t$d, [$a];", 4257 []>; 4258def TXQ_WIDTH_R 4259 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4260 "txq.width.b32 \t$d, [$a];", 4261 []>; 4262def TXQ_WIDTH_I 4263 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4264 "txq.width.b32 \t$d, [$a];", 4265 []>; 4266def TXQ_HEIGHT_R 4267 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4268 "txq.height.b32 \t$d, [$a];", 4269 []>; 4270def TXQ_HEIGHT_I 4271 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4272 "txq.height.b32 \t$d, [$a];", 4273 []>; 4274def TXQ_DEPTH_R 4275 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4276 "txq.depth.b32 \t$d, [$a];", 4277 []>; 4278def TXQ_DEPTH_I 4279 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4280 "txq.depth.b32 \t$d, [$a];", 4281 []>; 4282def TXQ_ARRAY_SIZE_R 4283 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4284 "txq.array_size.b32 \t$d, [$a];", 4285 []>; 4286def TXQ_ARRAY_SIZE_I 4287 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4288 "txq.array_size.b32 \t$d, [$a];", 4289 []>; 4290def TXQ_NUM_SAMPLES_R 4291 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4292 "txq.num_samples.b32 \t$d, [$a];", 4293 []>; 4294def TXQ_NUM_SAMPLES_I 4295 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4296 "txq.num_samples.b32 \t$d, [$a];", 4297 []>; 4298def TXQ_NUM_MIPMAP_LEVELS_R 4299 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4300 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4301 []>; 4302def TXQ_NUM_MIPMAP_LEVELS_I 4303 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4304 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4305 []>; 4306} 4307 4308def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4309 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4310def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4311 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4312def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4313 (TXQ_WIDTH_R Int64Regs:$a)>; 4314def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4315 (TXQ_HEIGHT_R Int64Regs:$a)>; 4316def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4317 (TXQ_DEPTH_R Int64Regs:$a)>; 4318def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4319 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; 4320def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4321 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; 4322def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4323 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; 4324 4325 4326//----------------------------------- 4327// Surface Query Intrinsics 4328//----------------------------------- 4329 4330let IsSurfTexQuery = true in { 4331def SUQ_CHANNEL_ORDER_R 4332 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4333 "suq.channel_order.b32 \t$d, [$a];", 4334 []>; 4335def SUQ_CHANNEL_ORDER_I 4336 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4337 "suq.channel_order.b32 \t$d, [$a];", 4338 []>; 4339def SUQ_CHANNEL_DATA_TYPE_R 4340 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4341 "suq.channel_data_type.b32 \t$d, [$a];", 4342 []>; 4343def SUQ_CHANNEL_DATA_TYPE_I 4344 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4345 "suq.channel_data_type.b32 \t$d, [$a];", 4346 []>; 4347def SUQ_WIDTH_R 4348 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4349 "suq.width.b32 \t$d, [$a];", 4350 []>; 4351def SUQ_WIDTH_I 4352 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4353 "suq.width.b32 \t$d, [$a];", 4354 []>; 4355def SUQ_HEIGHT_R 4356 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4357 "suq.height.b32 \t$d, [$a];", 4358 []>; 4359def SUQ_HEIGHT_I 4360 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4361 "suq.height.b32 \t$d, [$a];", 4362 []>; 4363def SUQ_DEPTH_R 4364 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4365 "suq.depth.b32 \t$d, [$a];", 4366 []>; 4367def SUQ_DEPTH_I 4368 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4369 "suq.depth.b32 \t$d, [$a];", 4370 []>; 4371def SUQ_ARRAY_SIZE_R 4372 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4373 "suq.array_size.b32 \t$d, [$a];", 4374 []>; 4375def SUQ_ARRAY_SIZE_I 4376 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4377 "suq.array_size.b32 \t$d, [$a];", 4378 []>; 4379} 4380 4381def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4382 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4383def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4384 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4385def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4386 (SUQ_WIDTH_R Int64Regs:$a)>; 4387def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4388 (SUQ_HEIGHT_R Int64Regs:$a)>; 4389def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4390 (SUQ_DEPTH_R Int64Regs:$a)>; 4391def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4392 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; 4393 4394 4395//===- Handle Query -------------------------------------------------------===// 4396 4397// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4398def ISTYPEP_SAMPLER 4399 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4400 "istypep.samplerref \t$d, $a;", 4401 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4402def ISTYPEP_SURFACE 4403 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4404 "istypep.surfref \t$d, $a;", 4405 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4406def ISTYPEP_TEXTURE 4407 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4408 "istypep.texref \t$d, $a;", 4409 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4410 4411//===- Surface Stores -----------------------------------------------------===// 4412 4413let IsSust = true in { 4414 4415class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf> 4416 : NVPTXInst<(outs), 4417 !con(surf, (ins Int32Regs:$x, intype:$r)), 4418 inst # " \t[$s, \\{$x\\}], \\{$r\\};", 4419 []>; 4420multiclass SUST_1D<string inst, NVPTXRegClass intype> { 4421 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>; 4422 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>; 4423} 4424 4425defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; 4426defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; 4427defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; 4428defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; 4429 4430defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; 4431defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; 4432defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; 4433defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; 4434 4435defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; 4436defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; 4437defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; 4438defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; 4439 4440defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; 4441defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; 4442defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; 4443 4444class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4445 : NVPTXInst<(outs), 4446 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), 4447 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", 4448 []>; 4449multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> { 4450 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4451 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>; 4452} 4453 4454defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; 4455defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; 4456defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; 4457defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; 4458 4459defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; 4460defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; 4461defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; 4462defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; 4463 4464defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; 4465defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; 4466defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; 4467defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; 4468 4469defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; 4470defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; 4471defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; 4472 4473class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4474 : NVPTXInst<(outs), 4475 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, 4476 intype:$b, intype:$a)), 4477 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4478 []>; 4479multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> { 4480 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4481 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>; 4482} 4483 4484defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; 4485defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; 4486defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; 4487 4488defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; 4489defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; 4490defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; 4491 4492defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; 4493defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; 4494defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; 4495 4496defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; 4497defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; 4498defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; 4499 4500class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4501 : NVPTXInst<(outs), 4502 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), 4503 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4504 []>; 4505multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> { 4506 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4507 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4508} 4509 4510defm SUST_B_1D_ARRAY_B8_CLAMP 4511 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; 4512defm SUST_B_1D_ARRAY_B16_CLAMP 4513 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; 4514defm SUST_B_1D_ARRAY_B32_CLAMP 4515 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; 4516defm SUST_B_1D_ARRAY_B64_CLAMP 4517 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; 4518 4519defm SUST_B_1D_ARRAY_B8_TRAP 4520 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; 4521defm SUST_B_1D_ARRAY_B16_TRAP 4522 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; 4523defm SUST_B_1D_ARRAY_B32_TRAP 4524 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; 4525defm SUST_B_1D_ARRAY_B64_TRAP 4526 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; 4527 4528defm SUST_B_1D_ARRAY_B8_ZERO 4529 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; 4530defm SUST_B_1D_ARRAY_B16_ZERO 4531 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; 4532defm SUST_B_1D_ARRAY_B32_ZERO 4533 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; 4534defm SUST_B_1D_ARRAY_B64_ZERO 4535 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; 4536 4537defm SUST_P_1D_ARRAY_B8_TRAP 4538 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; 4539defm SUST_P_1D_ARRAY_B16_TRAP 4540 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; 4541defm SUST_P_1D_ARRAY_B32_TRAP 4542 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; 4543 4544class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4545 : NVPTXInst<(outs), 4546 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4547 intype:$r, intype:$g)), 4548 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4549 []>; 4550multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4551 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4552 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4553} 4554 4555defm SUST_B_1D_ARRAY_V2B8_CLAMP 4556 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; 4557defm SUST_B_1D_ARRAY_V2B16_CLAMP 4558 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; 4559defm SUST_B_1D_ARRAY_V2B32_CLAMP 4560 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; 4561defm SUST_B_1D_ARRAY_V2B64_CLAMP 4562 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; 4563 4564defm SUST_B_1D_ARRAY_V2B8_TRAP 4565 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; 4566defm SUST_B_1D_ARRAY_V2B16_TRAP 4567 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; 4568defm SUST_B_1D_ARRAY_V2B32_TRAP 4569 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; 4570defm SUST_B_1D_ARRAY_V2B64_TRAP 4571 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; 4572 4573defm SUST_B_1D_ARRAY_V2B8_ZERO 4574 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; 4575defm SUST_B_1D_ARRAY_V2B16_ZERO 4576 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; 4577defm SUST_B_1D_ARRAY_V2B32_ZERO 4578 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; 4579defm SUST_B_1D_ARRAY_V2B64_ZERO 4580 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; 4581 4582defm SUST_P_1D_ARRAY_V2B8_TRAP 4583 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; 4584defm SUST_P_1D_ARRAY_V2B16_TRAP 4585 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; 4586defm SUST_P_1D_ARRAY_V2B32_TRAP 4587 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; 4588 4589class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4590 : NVPTXInst<(outs), 4591 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4592 intype:$r, intype:$g, intype:$b, intype:$a)), 4593 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", 4594 []>; 4595multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4596 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4597 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4598} 4599 4600defm SUST_B_1D_ARRAY_V4B8_CLAMP 4601 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; 4602defm SUST_B_1D_ARRAY_V4B16_CLAMP 4603 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; 4604defm SUST_B_1D_ARRAY_V4B32_CLAMP 4605 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; 4606 4607defm SUST_B_1D_ARRAY_V4B8_TRAP 4608 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; 4609defm SUST_B_1D_ARRAY_V4B16_TRAP 4610 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; 4611defm SUST_B_1D_ARRAY_V4B32_TRAP 4612 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; 4613 4614defm SUST_B_1D_ARRAY_V4B8_ZERO 4615 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; 4616defm SUST_B_1D_ARRAY_V4B16_ZERO 4617 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; 4618defm SUST_B_1D_ARRAY_V4B32_ZERO 4619 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; 4620 4621defm SUST_P_1D_ARRAY_V4B8_TRAP 4622 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; 4623defm SUST_P_1D_ARRAY_V4B16_TRAP 4624 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; 4625defm SUST_P_1D_ARRAY_V4B32_TRAP 4626 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; 4627 4628class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf> 4629 : NVPTXInst<(outs), 4630 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), 4631 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", 4632 []>; 4633multiclass SUST_2D<string inst, NVPTXRegClass intype> { 4634 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>; 4635 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>; 4636} 4637 4638defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; 4639defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; 4640defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; 4641defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; 4642 4643defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; 4644defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; 4645defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; 4646defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; 4647 4648defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; 4649defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; 4650defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; 4651defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; 4652 4653defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; 4654defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; 4655defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; 4656 4657class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4658 : NVPTXInst<(outs), 4659 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4660 intype:$r, intype:$g)), 4661 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4662 []>; 4663multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> { 4664 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4665 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>; 4666} 4667 4668defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; 4669defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; 4670defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; 4671defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; 4672 4673defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; 4674defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; 4675defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; 4676defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; 4677 4678defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; 4679defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; 4680defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; 4681defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; 4682 4683defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; 4684defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; 4685defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; 4686 4687class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4688 : NVPTXInst<(outs), 4689 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4690 intype:$r, intype:$g, intype:$b, intype:$a)), 4691 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", 4692 []>; 4693multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> { 4694 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4695 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>; 4696} 4697 4698defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; 4699defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; 4700defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; 4701 4702defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; 4703defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; 4704defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; 4705 4706defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; 4707defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; 4708defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; 4709 4710defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; 4711defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; 4712defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; 4713 4714class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4715 : NVPTXInst<(outs), 4716 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4717 intype:$r)), 4718 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4719 []>; 4720multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> { 4721 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4722 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4723} 4724 4725defm SUST_B_2D_ARRAY_B8_CLAMP 4726 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; 4727defm SUST_B_2D_ARRAY_B16_CLAMP 4728 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; 4729defm SUST_B_2D_ARRAY_B32_CLAMP 4730 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; 4731defm SUST_B_2D_ARRAY_B64_CLAMP 4732 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; 4733 4734defm SUST_B_2D_ARRAY_B8_TRAP 4735 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; 4736defm SUST_B_2D_ARRAY_B16_TRAP 4737 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; 4738defm SUST_B_2D_ARRAY_B32_TRAP 4739 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; 4740defm SUST_B_2D_ARRAY_B64_TRAP 4741 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; 4742 4743defm SUST_B_2D_ARRAY_B8_ZERO 4744 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; 4745defm SUST_B_2D_ARRAY_B16_ZERO 4746 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; 4747defm SUST_B_2D_ARRAY_B32_ZERO 4748 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; 4749defm SUST_B_2D_ARRAY_B64_ZERO 4750 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; 4751 4752defm SUST_P_2D_ARRAY_B8_TRAP 4753 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; 4754defm SUST_P_2D_ARRAY_B16_TRAP 4755 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; 4756defm SUST_P_2D_ARRAY_B32_TRAP 4757 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; 4758 4759class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4760 : NVPTXInst<(outs), 4761 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4762 intype:$r, intype:$g)), 4763 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", 4764 []>; 4765multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4766 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4767 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4768} 4769 4770defm SUST_B_2D_ARRAY_V2B8_CLAMP 4771 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; 4772defm SUST_B_2D_ARRAY_V2B16_CLAMP 4773 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; 4774defm SUST_B_2D_ARRAY_V2B32_CLAMP 4775 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; 4776defm SUST_B_2D_ARRAY_V2B64_CLAMP 4777 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; 4778 4779defm SUST_B_2D_ARRAY_V2B8_TRAP 4780 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; 4781defm SUST_B_2D_ARRAY_V2B16_TRAP 4782 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; 4783defm SUST_B_2D_ARRAY_V2B32_TRAP 4784 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; 4785defm SUST_B_2D_ARRAY_V2B64_TRAP 4786 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; 4787 4788defm SUST_B_2D_ARRAY_V2B8_ZERO 4789 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; 4790defm SUST_B_2D_ARRAY_V2B16_ZERO 4791 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; 4792defm SUST_B_2D_ARRAY_V2B32_ZERO 4793 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; 4794defm SUST_B_2D_ARRAY_V2B64_ZERO 4795 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; 4796 4797defm SUST_P_2D_ARRAY_V2B8_TRAP 4798 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; 4799defm SUST_P_2D_ARRAY_V2B16_TRAP 4800 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; 4801defm SUST_P_2D_ARRAY_V2B32_TRAP 4802 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; 4803 4804class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4805 : NVPTXInst<(outs), 4806 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4807 intype:$r, intype:$g, intype:$b, intype:$a)), 4808 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", 4809 []>; 4810multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4811 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4812 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4813} 4814 4815defm SUST_B_2D_ARRAY_V4B8_CLAMP 4816 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; 4817defm SUST_B_2D_ARRAY_V4B16_CLAMP 4818 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; 4819defm SUST_B_2D_ARRAY_V4B32_CLAMP 4820 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; 4821 4822defm SUST_B_2D_ARRAY_V4B8_TRAP 4823 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; 4824defm SUST_B_2D_ARRAY_V4B16_TRAP 4825 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; 4826defm SUST_B_2D_ARRAY_V4B32_TRAP 4827 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; 4828 4829defm SUST_B_2D_ARRAY_V4B8_ZERO 4830 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; 4831defm SUST_B_2D_ARRAY_V4B16_ZERO 4832 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; 4833defm SUST_B_2D_ARRAY_V4B32_ZERO 4834 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; 4835 4836defm SUST_P_2D_ARRAY_V4B8_TRAP 4837 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; 4838defm SUST_P_2D_ARRAY_V4B16_TRAP 4839 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; 4840defm SUST_P_2D_ARRAY_V4B32_TRAP 4841 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; 4842 4843class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf> 4844 : NVPTXInst<(outs), 4845 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4846 intype:$r)), 4847 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4848 []>; 4849multiclass SUST_3D<string inst, NVPTXRegClass intype> { 4850 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>; 4851 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>; 4852} 4853 4854defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; 4855defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; 4856defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; 4857defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; 4858 4859defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; 4860defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; 4861defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; 4862defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; 4863 4864defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; 4865defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; 4866defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; 4867defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; 4868 4869defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; 4870defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; 4871defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; 4872 4873class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4874 : NVPTXInst<(outs), 4875 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4876 intype:$r, intype:$g)), 4877 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", 4878 []>; 4879multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> { 4880 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4881 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>; 4882} 4883 4884defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; 4885defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; 4886defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; 4887defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; 4888 4889defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; 4890defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; 4891defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; 4892defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; 4893 4894defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; 4895defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; 4896defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; 4897defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; 4898 4899defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; 4900defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; 4901defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; 4902 4903class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4904 : NVPTXInst<(outs), 4905 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4906 intype:$r, intype:$g, intype:$b, intype:$a)), 4907 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", 4908 []>; 4909multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> { 4910 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4911 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>; 4912} 4913 4914defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; 4915defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; 4916defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; 4917 4918defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; 4919defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; 4920defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; 4921 4922defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; 4923defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; 4924defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; 4925 4926defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; 4927defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; 4928defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; 4929 4930} 4931 4932// Surface store instruction patterns 4933// I'm not sure why we can't just include these in the instruction definitions, 4934// but TableGen complains of type errors :( 4935 4936// .clamp variant 4937def : Pat<(int_nvvm_sust_b_1d_i8_clamp 4938 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4939 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4940 4941def : Pat<(int_nvvm_sust_b_1d_i16_clamp 4942 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4943 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4944 4945def : Pat<(int_nvvm_sust_b_1d_i32_clamp 4946 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4947 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 4948 4949def : Pat<(int_nvvm_sust_b_1d_i64_clamp 4950 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4951 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 4952 4953def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 4954 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4955 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4956 Int16Regs:$r, Int16Regs:$g)>; 4957 4958def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 4959 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4960 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4961 Int16Regs:$r, Int16Regs:$g)>; 4962 4963def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 4964 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4965 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4966 Int32Regs:$r, Int32Regs:$g)>; 4967 4968def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 4969 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4970 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4971 Int64Regs:$r, Int64Regs:$g)>; 4972 4973def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 4974 Int64Regs:$s, Int32Regs:$x, 4975 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4976 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4977 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4978 4979def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 4980 Int64Regs:$s, Int32Regs:$x, 4981 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4982 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4983 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4984 4985def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 4986 Int64Regs:$s, Int32Regs:$x, 4987 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4988 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4989 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4990 4991 4992 4993def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 4994 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 4995 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4996 Int16Regs:$r)>; 4997 4998def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 4999 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5000 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5001 Int16Regs:$r)>; 5002 5003def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 5004 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5005 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5006 Int32Regs:$r)>; 5007 5008def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 5009 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5010 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5011 Int64Regs:$r)>; 5012 5013def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 5014 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5015 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5016 Int16Regs:$r, Int16Regs:$g)>; 5017 5018def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 5019 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5020 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5021 Int16Regs:$r, Int16Regs:$g)>; 5022 5023def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 5024 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5025 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5026 Int32Regs:$r, Int32Regs:$g)>; 5027 5028def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 5029 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5030 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5031 Int64Regs:$r, Int64Regs:$g)>; 5032 5033def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 5034 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5035 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5036 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5037 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5038 5039def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 5040 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5041 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5042 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5043 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5044 5045def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 5046 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5047 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5048 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5049 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5050 5051 5052 5053def : Pat<(int_nvvm_sust_b_2d_i8_clamp 5054 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5055 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5056 Int16Regs:$r)>; 5057 5058def : Pat<(int_nvvm_sust_b_2d_i16_clamp 5059 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5060 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5061 Int16Regs:$r)>; 5062 5063def : Pat<(int_nvvm_sust_b_2d_i32_clamp 5064 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5065 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5066 Int32Regs:$r)>; 5067 5068def : Pat<(int_nvvm_sust_b_2d_i64_clamp 5069 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5070 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5071 Int64Regs:$r)>; 5072 5073def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 5074 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5075 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5076 Int16Regs:$r, Int16Regs:$g)>; 5077 5078def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 5079 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5080 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5081 Int16Regs:$r, Int16Regs:$g)>; 5082 5083def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 5084 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5085 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5086 Int32Regs:$r, Int32Regs:$g)>; 5087 5088def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 5089 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5090 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5091 Int64Regs:$r, Int64Regs:$g)>; 5092 5093def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 5094 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5095 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5096 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5097 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5098 5099def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 5100 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5101 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5102 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5103 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5104 5105def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 5106 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5107 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5108 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5109 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5110 5111 5112 5113def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 5114 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5115 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, 5116 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5117 Int16Regs:$r)>; 5118 5119def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 5120 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5121 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, 5122 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5123 Int16Regs:$r)>; 5124 5125def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 5126 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5127 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, 5128 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5129 Int32Regs:$r)>; 5130 5131def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 5132 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5133 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, 5134 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5135 Int64Regs:$r)>; 5136 5137def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 5138 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5139 Int16Regs:$r, Int16Regs:$g), 5140 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5141 Int32Regs:$x, Int32Regs:$y, 5142 Int16Regs:$r, Int16Regs:$g)>; 5143 5144def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 5145 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5146 Int16Regs:$r, Int16Regs:$g), 5147 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5148 Int32Regs:$x, Int32Regs:$y, 5149 Int16Regs:$r, Int16Regs:$g)>; 5150 5151def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 5152 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5153 Int32Regs:$g), 5154 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5155 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5156 5157def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 5158 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5159 Int64Regs:$g), 5160 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5161 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5162 5163def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 5164 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5165 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5166 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, 5167 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5168 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5169 5170def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 5171 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5172 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5173 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, 5174 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5175 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5176 5177def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 5178 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5179 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5180 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5181 Int32Regs:$x, Int32Regs:$y, 5182 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5183 5184 5185 5186def : Pat<(int_nvvm_sust_b_3d_i8_clamp 5187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5188 Int16Regs:$r), 5189 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, 5190 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5191 Int16Regs:$r)>; 5192 5193def : Pat<(int_nvvm_sust_b_3d_i16_clamp 5194 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5195 Int16Regs:$r), 5196 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, 5197 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5198 Int16Regs:$r)>; 5199 5200def : Pat<(int_nvvm_sust_b_3d_i32_clamp 5201 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5202 Int32Regs:$r), 5203 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, 5204 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5205 Int32Regs:$r)>; 5206 5207def : Pat<(int_nvvm_sust_b_3d_i64_clamp 5208 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5209 Int64Regs:$r), 5210 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, 5211 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5212 Int64Regs:$r)>; 5213 5214def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 5215 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5216 Int16Regs:$r, Int16Regs:$g), 5217 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, 5218 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5219 Int16Regs:$r, Int16Regs:$g)>; 5220 5221def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 5222 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5223 Int16Regs:$r, Int16Regs:$g), 5224 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, 5225 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5226 Int16Regs:$r, Int16Regs:$g)>; 5227 5228def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 5229 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5230 Int32Regs:$r, Int32Regs:$g), 5231 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, 5232 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5233 Int32Regs:$r, Int32Regs:$g)>; 5234 5235def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 5236 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5237 Int64Regs:$r, Int64Regs:$g), 5238 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, 5239 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5240 Int64Regs:$r, Int64Regs:$g)>; 5241 5242def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 5243 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5244 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5245 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, 5246 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5247 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5248 5249def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 5250 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5251 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5252 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, 5253 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5254 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5255 5256def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 5257 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5258 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5259 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, 5260 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5261 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5262 5263 5264// .trap variant 5265def : Pat<(int_nvvm_sust_b_1d_i8_trap 5266 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5267 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5268 5269def : Pat<(int_nvvm_sust_b_1d_i16_trap 5270 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5271 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5272 5273def : Pat<(int_nvvm_sust_b_1d_i32_trap 5274 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5275 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5276 5277def : Pat<(int_nvvm_sust_b_1d_i64_trap 5278 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5279 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5280 5281def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 5282 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5283 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5284 Int16Regs:$r, Int16Regs:$g)>; 5285 5286def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 5287 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5288 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5289 Int16Regs:$r, Int16Regs:$g)>; 5290 5291def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 5292 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5293 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5294 Int32Regs:$r, Int32Regs:$g)>; 5295 5296def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 5297 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5298 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, 5299 Int64Regs:$r, Int64Regs:$g)>; 5300 5301def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 5302 Int64Regs:$s, Int32Regs:$x, 5303 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5304 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5305 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5306 5307def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 5308 Int64Regs:$s, Int32Regs:$x, 5309 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5310 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5311 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5312 5313def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 5314 Int64Regs:$s, Int32Regs:$x, 5315 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5316 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5317 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5318 5319 5320 5321def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 5322 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5323 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5324 Int16Regs:$r)>; 5325 5326def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 5327 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5328 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5329 Int16Regs:$r)>; 5330 5331def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 5332 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5333 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5334 Int32Regs:$r)>; 5335 5336def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 5337 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5338 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5339 Int64Regs:$r)>; 5340 5341def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 5342 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5343 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5344 Int16Regs:$r, Int16Regs:$g)>; 5345 5346def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 5347 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5348 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5349 Int16Regs:$r, Int16Regs:$g)>; 5350 5351def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 5352 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5353 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5354 Int32Regs:$r, Int32Regs:$g)>; 5355 5356def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 5357 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5358 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5359 Int64Regs:$r, Int64Regs:$g)>; 5360 5361def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 5362 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5363 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5364 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5365 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5366 5367def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 5368 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5369 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5370 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5371 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5372 5373def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 5374 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5375 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5376 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5377 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5378 5379 5380 5381def : Pat<(int_nvvm_sust_b_2d_i8_trap 5382 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5383 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5384 Int16Regs:$r)>; 5385 5386def : Pat<(int_nvvm_sust_b_2d_i16_trap 5387 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5388 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5389 Int16Regs:$r)>; 5390 5391def : Pat<(int_nvvm_sust_b_2d_i32_trap 5392 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5393 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5394 Int32Regs:$r)>; 5395 5396def : Pat<(int_nvvm_sust_b_2d_i64_trap 5397 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5398 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5399 Int64Regs:$r)>; 5400 5401def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 5402 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5403 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5404 Int16Regs:$r, Int16Regs:$g)>; 5405 5406def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 5407 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5408 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5409 Int16Regs:$r, Int16Regs:$g)>; 5410 5411def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 5412 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5413 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5414 Int32Regs:$r, Int32Regs:$g)>; 5415 5416def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 5417 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5418 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5419 Int64Regs:$r, Int64Regs:$g)>; 5420 5421def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 5422 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5423 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5424 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5425 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5426 5427def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 5428 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5429 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5430 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5431 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5432 5433def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 5434 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5435 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5436 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5437 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5438 5439 5440 5441def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 5442 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5443 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 5444 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5445 Int16Regs:$r)>; 5446 5447def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 5448 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5449 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 5450 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5451 Int16Regs:$r)>; 5452 5453def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 5454 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5455 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 5456 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5457 Int32Regs:$r)>; 5458 5459def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 5460 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5461 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, 5462 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5463 Int64Regs:$r)>; 5464 5465def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 5466 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5467 Int16Regs:$r, Int16Regs:$g), 5468 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 5469 Int32Regs:$x, Int32Regs:$y, 5470 Int16Regs:$r, Int16Regs:$g)>; 5471 5472def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 5473 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5474 Int16Regs:$r, Int16Regs:$g), 5475 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 5476 Int32Regs:$x, Int32Regs:$y, 5477 Int16Regs:$r, Int16Regs:$g)>; 5478 5479def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 5480 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5481 Int32Regs:$g), 5482 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5483 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5484 5485def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 5486 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5487 Int64Regs:$g), 5488 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, 5489 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5490 5491def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 5492 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5493 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5494 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 5495 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5496 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5497 5498def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 5499 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5500 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5501 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 5502 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5503 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5504 5505def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 5506 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5507 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5508 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5509 Int32Regs:$x, Int32Regs:$y, 5510 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5511 5512 5513 5514def : Pat<(int_nvvm_sust_b_3d_i8_trap 5515 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5516 Int16Regs:$r), 5517 (SUST_B_3D_B8_TRAP_R Int64Regs:$s, 5518 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5519 Int16Regs:$r)>; 5520 5521def : Pat<(int_nvvm_sust_b_3d_i16_trap 5522 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5523 Int16Regs:$r), 5524 (SUST_B_3D_B16_TRAP_R Int64Regs:$s, 5525 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5526 Int16Regs:$r)>; 5527 5528def : Pat<(int_nvvm_sust_b_3d_i32_trap 5529 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5530 Int32Regs:$r), 5531 (SUST_B_3D_B32_TRAP_R Int64Regs:$s, 5532 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5533 Int32Regs:$r)>; 5534 5535def : Pat<(int_nvvm_sust_b_3d_i64_trap 5536 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5537 Int64Regs:$r), 5538 (SUST_B_3D_B64_TRAP_R Int64Regs:$s, 5539 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5540 Int64Regs:$r)>; 5541 5542def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 5543 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5544 Int16Regs:$r, Int16Regs:$g), 5545 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, 5546 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5547 Int16Regs:$r, Int16Regs:$g)>; 5548 5549def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 5550 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5551 Int16Regs:$r, Int16Regs:$g), 5552 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, 5553 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5554 Int16Regs:$r, Int16Regs:$g)>; 5555 5556def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 5557 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5558 Int32Regs:$r, Int32Regs:$g), 5559 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, 5560 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5561 Int32Regs:$r, Int32Regs:$g)>; 5562 5563def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 5564 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5565 Int64Regs:$r, Int64Regs:$g), 5566 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, 5567 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5568 Int64Regs:$r, Int64Regs:$g)>; 5569 5570def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 5571 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5572 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5573 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, 5574 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5575 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5576 5577def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 5578 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5579 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5580 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, 5581 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5582 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5583 5584def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 5585 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5586 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5587 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, 5588 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5589 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5590 5591 5592// .zero variant 5593def : Pat<(int_nvvm_sust_b_1d_i8_zero 5594 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5595 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5596 5597def : Pat<(int_nvvm_sust_b_1d_i16_zero 5598 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5599 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5600 5601def : Pat<(int_nvvm_sust_b_1d_i32_zero 5602 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5603 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5604 5605def : Pat<(int_nvvm_sust_b_1d_i64_zero 5606 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5607 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5608 5609def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 5610 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5611 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5612 Int16Regs:$r, Int16Regs:$g)>; 5613 5614def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 5615 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5616 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5617 Int16Regs:$r, Int16Regs:$g)>; 5618 5619def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 5620 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5621 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5622 Int32Regs:$r, Int32Regs:$g)>; 5623 5624def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 5625 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5626 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, 5627 Int64Regs:$r, Int64Regs:$g)>; 5628 5629def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 5630 Int64Regs:$s, Int32Regs:$x, 5631 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5632 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5633 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5634 5635def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 5636 Int64Regs:$s, Int32Regs:$x, 5637 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5638 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5639 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5640 5641def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 5642 Int64Regs:$s, Int32Regs:$x, 5643 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5644 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5645 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5646 5647 5648 5649def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 5650 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5651 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5652 Int16Regs:$r)>; 5653 5654def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 5655 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5656 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5657 Int16Regs:$r)>; 5658 5659def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 5660 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5661 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5662 Int32Regs:$r)>; 5663 5664def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 5665 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5666 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5667 Int64Regs:$r)>; 5668 5669def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 5670 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5671 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5672 Int16Regs:$r, Int16Regs:$g)>; 5673 5674def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 5675 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5676 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5677 Int16Regs:$r, Int16Regs:$g)>; 5678 5679def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 5680 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5681 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5682 Int32Regs:$r, Int32Regs:$g)>; 5683 5684def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 5685 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5686 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5687 Int64Regs:$r, Int64Regs:$g)>; 5688 5689def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 5690 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5691 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5692 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5693 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5694 5695def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 5696 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5697 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5698 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5699 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5700 5701def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 5702 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5703 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5704 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5705 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5706 5707 5708 5709def : Pat<(int_nvvm_sust_b_2d_i8_zero 5710 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5711 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5712 Int16Regs:$r)>; 5713 5714def : Pat<(int_nvvm_sust_b_2d_i16_zero 5715 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5716 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5717 Int16Regs:$r)>; 5718 5719def : Pat<(int_nvvm_sust_b_2d_i32_zero 5720 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5721 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5722 Int32Regs:$r)>; 5723 5724def : Pat<(int_nvvm_sust_b_2d_i64_zero 5725 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5726 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5727 Int64Regs:$r)>; 5728 5729def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 5730 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5731 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5732 Int16Regs:$r, Int16Regs:$g)>; 5733 5734def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 5735 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5736 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5737 Int16Regs:$r, Int16Regs:$g)>; 5738 5739def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 5740 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5741 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5742 Int32Regs:$r, Int32Regs:$g)>; 5743 5744def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 5745 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5746 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5747 Int64Regs:$r, Int64Regs:$g)>; 5748 5749def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 5750 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5751 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5752 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5753 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5754 5755def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 5756 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5757 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5758 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5759 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5760 5761def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 5762 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5763 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5764 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5765 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5766 5767 5768 5769def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 5770 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5771 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, 5772 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5773 Int16Regs:$r)>; 5774 5775def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 5776 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5777 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, 5778 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5779 Int16Regs:$r)>; 5780 5781def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 5782 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5783 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, 5784 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5785 Int32Regs:$r)>; 5786 5787def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 5788 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5789 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, 5790 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5791 Int64Regs:$r)>; 5792 5793def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 5794 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5795 Int16Regs:$r, Int16Regs:$g), 5796 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, 5797 Int32Regs:$x, Int32Regs:$y, 5798 Int16Regs:$r, Int16Regs:$g)>; 5799 5800def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 5801 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5802 Int16Regs:$r, Int16Regs:$g), 5803 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, 5804 Int32Regs:$x, Int32Regs:$y, 5805 Int16Regs:$r, Int16Regs:$g)>; 5806 5807def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 5808 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5809 Int32Regs:$g), 5810 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5811 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5812 5813def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 5814 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5815 Int64Regs:$g), 5816 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, 5817 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5818 5819def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 5820 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5821 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5822 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, 5823 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5824 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5825 5826def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 5827 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5828 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5829 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, 5830 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5831 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5832 5833def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 5834 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5835 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5836 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5837 Int32Regs:$x, Int32Regs:$y, 5838 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5839 5840 5841 5842def : Pat<(int_nvvm_sust_b_3d_i8_zero 5843 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5844 Int16Regs:$r), 5845 (SUST_B_3D_B8_ZERO_R Int64Regs:$s, 5846 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5847 Int16Regs:$r)>; 5848 5849def : Pat<(int_nvvm_sust_b_3d_i16_zero 5850 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5851 Int16Regs:$r), 5852 (SUST_B_3D_B16_ZERO_R Int64Regs:$s, 5853 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5854 Int16Regs:$r)>; 5855 5856def : Pat<(int_nvvm_sust_b_3d_i32_zero 5857 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5858 Int32Regs:$r), 5859 (SUST_B_3D_B32_ZERO_R Int64Regs:$s, 5860 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5861 Int32Regs:$r)>; 5862 5863def : Pat<(int_nvvm_sust_b_3d_i64_zero 5864 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5865 Int64Regs:$r), 5866 (SUST_B_3D_B64_ZERO_R Int64Regs:$s, 5867 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5868 Int64Regs:$r)>; 5869 5870def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 5871 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5872 Int16Regs:$r, Int16Regs:$g), 5873 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, 5874 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5875 Int16Regs:$r, Int16Regs:$g)>; 5876 5877def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 5878 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5879 Int16Regs:$r, Int16Regs:$g), 5880 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, 5881 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5882 Int16Regs:$r, Int16Regs:$g)>; 5883 5884def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 5885 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5886 Int32Regs:$r, Int32Regs:$g), 5887 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, 5888 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5889 Int32Regs:$r, Int32Regs:$g)>; 5890 5891def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 5892 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5893 Int64Regs:$r, Int64Regs:$g), 5894 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, 5895 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5896 Int64Regs:$r, Int64Regs:$g)>; 5897 5898def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 5899 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5900 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5901 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, 5902 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5903 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5904 5905def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 5906 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5907 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5908 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, 5909 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5910 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5911 5912def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 5913 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5914 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5915 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, 5916 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5917 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5918 5919 5920 5921 5922def : Pat<(int_nvvm_sust_p_1d_i8_trap 5923 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5924 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5925 5926def : Pat<(int_nvvm_sust_p_1d_i16_trap 5927 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5928 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5929 5930def : Pat<(int_nvvm_sust_p_1d_i32_trap 5931 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5932 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5933 5934def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 5935 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5936 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5937 Int16Regs:$r, Int16Regs:$g)>; 5938 5939def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 5940 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5941 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5942 Int16Regs:$r, Int16Regs:$g)>; 5943 5944def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 5945 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5946 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5947 Int32Regs:$r, Int32Regs:$g)>; 5948 5949def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 5950 Int64Regs:$s, Int32Regs:$x, 5951 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5952 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5953 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5954 5955def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 5956 Int64Regs:$s, Int32Regs:$x, 5957 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5958 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5959 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5960 5961def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 5962 Int64Regs:$s, Int32Regs:$x, 5963 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5964 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5965 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5966 5967 5968 5969def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 5970 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5971 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5972 Int16Regs:$r)>; 5973 5974def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 5975 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5976 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5977 Int16Regs:$r)>; 5978 5979def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 5980 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5981 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5982 Int32Regs:$r)>; 5983 5984def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 5985 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5986 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5987 Int16Regs:$r, Int16Regs:$g)>; 5988 5989def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 5990 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5991 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5992 Int16Regs:$r, Int16Regs:$g)>; 5993 5994def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 5995 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5996 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5997 Int32Regs:$r, Int32Regs:$g)>; 5998 5999def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 6000 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6001 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6002 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6003 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6004 6005def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 6006 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6007 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6008 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6009 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6010 6011def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 6012 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6013 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6014 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6015 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6016 6017 6018 6019def : Pat<(int_nvvm_sust_p_2d_i8_trap 6020 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6021 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6022 Int16Regs:$r)>; 6023 6024def : Pat<(int_nvvm_sust_p_2d_i16_trap 6025 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6026 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6027 Int16Regs:$r)>; 6028 6029def : Pat<(int_nvvm_sust_p_2d_i32_trap 6030 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6031 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6032 Int32Regs:$r)>; 6033 6034def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 6035 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6036 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6037 Int16Regs:$r, Int16Regs:$g)>; 6038 6039def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 6040 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6041 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6042 Int16Regs:$r, Int16Regs:$g)>; 6043 6044def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 6045 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6046 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6047 Int32Regs:$r, Int32Regs:$g)>; 6048 6049def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 6050 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6051 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6052 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6053 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6054 6055def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 6056 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6057 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6058 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6059 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6060 6061def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 6062 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6063 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6064 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6065 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6066 6067 6068 6069def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 6070 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6071 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 6072 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6073 Int16Regs:$r)>; 6074 6075def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 6076 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6077 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 6078 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6079 Int16Regs:$r)>; 6080 6081def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 6082 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6083 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 6084 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6085 Int32Regs:$r)>; 6086 6087def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 6088 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6089 Int16Regs:$r, Int16Regs:$g), 6090 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 6091 Int32Regs:$x, Int32Regs:$y, 6092 Int16Regs:$r, Int16Regs:$g)>; 6093 6094def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 6095 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6096 Int16Regs:$r, Int16Regs:$g), 6097 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 6098 Int32Regs:$x, Int32Regs:$y, 6099 Int16Regs:$r, Int16Regs:$g)>; 6100 6101def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 6102 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6103 Int32Regs:$g), 6104 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6105 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6106 6107def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 6108 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6109 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6110 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 6111 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6112 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6113 6114def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 6115 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6116 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6117 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 6118 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6119 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6120 6121def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 6122 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6123 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6124 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6125 Int32Regs:$x, Int32Regs:$y, 6126 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6127 6128 6129 6130def : Pat<(int_nvvm_sust_p_3d_i8_trap 6131 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6132 Int16Regs:$r), 6133 (SUST_P_3D_B8_TRAP_R Int64Regs:$s, 6134 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6135 Int16Regs:$r)>; 6136 6137def : Pat<(int_nvvm_sust_p_3d_i16_trap 6138 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6139 Int16Regs:$r), 6140 (SUST_P_3D_B16_TRAP_R Int64Regs:$s, 6141 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6142 Int16Regs:$r)>; 6143 6144def : Pat<(int_nvvm_sust_p_3d_i32_trap 6145 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6146 Int32Regs:$r), 6147 (SUST_P_3D_B32_TRAP_R Int64Regs:$s, 6148 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6149 Int32Regs:$r)>; 6150 6151def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 6152 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6153 Int16Regs:$r, Int16Regs:$g), 6154 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, 6155 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6156 Int16Regs:$r, Int16Regs:$g)>; 6157 6158def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 6159 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6160 Int16Regs:$r, Int16Regs:$g), 6161 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, 6162 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6163 Int16Regs:$r, Int16Regs:$g)>; 6164 6165def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 6166 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6167 Int32Regs:$r, Int32Regs:$g), 6168 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, 6169 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6170 Int32Regs:$r, Int32Regs:$g)>; 6171 6172def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 6173 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6174 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6175 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, 6176 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6177 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6178 6179def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 6180 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6181 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6182 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, 6183 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6184 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6185 6186def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 6187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6188 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6189 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, 6190 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6191 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6192 6193//----------------------------------- 6194// Read Special Registers 6195//----------------------------------- 6196 6197class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]> 6198 : NVPTXInst<(outs Int64Regs:$d), (ins), 6199 !strconcat("mov.u64 \t$d, %", regname, ";"), 6200 [(set Int64Regs:$d, (intop))]>, 6201 Requires<Preds>; 6202 6203class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]> 6204 : NVPTXInst<(outs Int32Regs:$d), (ins), 6205 !strconcat("mov.u32 \t$d, %", regname, ";"), 6206 [(set Int32Regs:$d, (intop))]>, 6207 Requires<Preds>; 6208 6209multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> { 6210 foreach suffix = ["x", "y", "z", "w"] in { 6211 defvar reg = regname # "." # suffix; 6212 defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix); 6213 def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>; 6214 } 6215} 6216 6217// TODO Add read vector-version of special registers 6218 6219defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">; 6220defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">; 6221defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">; 6222defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">; 6223 6224defm INT_PTX_SREG_CLUSTERID : 6225 PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>; 6226defm INT_PTX_SREG_NCLUSTERID : 6227 PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>; 6228defm INT_PTX_SREG_CLUSTER_CTAID : 6229 PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>; 6230defm INT_PTX_SREG_CLUSTER_NCTAID: 6231 PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>; 6232 6233def INT_PTX_SREG_CLUSTER_CTARANK : 6234 PTX_READ_SREG_R32<"cluster_ctarank", 6235 int_nvvm_read_ptx_sreg_cluster_ctarank, 6236 [hasSM<90>, hasPTX<78>]>; 6237def INT_PTX_SREG_CLUSTER_NCTARANK: 6238 PTX_READ_SREG_R32<"cluster_nctarank", 6239 int_nvvm_read_ptx_sreg_cluster_nctarank, 6240 [hasSM<90>, hasPTX<78>]>; 6241 6242 6243def INT_PTX_SREG_LANEID : 6244 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 6245def INT_PTX_SREG_WARPID : 6246 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 6247def INT_PTX_SREG_NWARPID : 6248 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 6249def INT_PTX_SREG_SMID : 6250 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 6251def INT_PTX_SREG_NSMID : 6252 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 6253def INT_PTX_SREG_GRIDID : 6254 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 6255 6256def INT_PTX_SREG_LANEMASK_EQ : 6257 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 6258def INT_PTX_SREG_LANEMASK_LE : 6259 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 6260def INT_PTX_SREG_LANEMASK_LT : 6261 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 6262def INT_PTX_SREG_LANEMASK_GE : 6263 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 6264def INT_PTX_SREG_LANEMASK_GT : 6265 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 6266 6267def INT_PTX_SREG_CLOCK : 6268 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 6269def INT_PTX_SREG_CLOCK64 : 6270 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 6271 6272def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 6273def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 6274def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 6275def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 6276 6277// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 6278// handle the constant. 6279def INT_PTX_SREG_WARPSIZE : 6280 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 6281 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 6282 6283// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 6284// In addition to target-independent fields provided by WMMA_REGS, it adds 6285// the fields commonly used to implement specific PTX instruction -- register 6286// types and names, constraints, parts of assembly, etc. 6287class WMMA_REGINFO<WMMA_REGS r, string op> 6288 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 6289 // NVPTX register types used to carry fragment data. 6290 NVPTXRegClass regclass = !cond( 6291 !eq(ptx_elt_type, "f16") : Int32Regs, 6292 !eq(ptx_elt_type, "f32") : Float32Regs, 6293 !eq(ptx_elt_type, "f64") : Float64Regs, 6294 !eq(ptx_elt_type, "bf16") : Int32Regs, 6295 !eq(ptx_elt_type, "tf32") : Int32Regs, 6296 !eq(ptx_elt_type, "s32") : Int32Regs, 6297 !eq(ptx_elt_type, "b16") : Int32Regs, 6298 !eq(ptx_elt_type, "s8") : Int32Regs, 6299 !eq(ptx_elt_type, "u8") : Int32Regs, 6300 !eq(ptx_elt_type, "s4") : Int32Regs, 6301 !eq(ptx_elt_type, "u4") : Int32Regs, 6302 !eq(ptx_elt_type, "b1") : Int32Regs); 6303 6304 // Instruction input/output arguments for the fragment. 6305 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 6306 6307 // List of register names for the fragment -- ["ra0", "ra1",...] 6308 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 6309 6310 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 6311 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 6312 6313 // Predicates for particular fragment variant. Technically those are 6314 // per-instruction predicates, but currently all fragments that can be used in 6315 // a given instruction are subject to the same constraints, so an instruction 6316 // can use predicates from any of its fragments. If/when this is no 6317 // longer the case, we can concat all per-fragment predicates to enforce that 6318 // all fragments of the instruction are viable. 6319 list<Predicate> Predicates = !cond( 6320 // fp16 -> fp16/fp32 @ m16n16k16 6321 !and(!eq(geom, "m16n16k16"), 6322 !or(!eq(ptx_elt_type, "f16"), 6323 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>], 6324 6325 !and(!eq(geom,"m8n8k4"), 6326 !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>], 6327 6328 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 6329 !and(!or(!eq(geom, "m8n32k16"), 6330 !eq(geom, "m32n8k16")), 6331 !or(!eq(ptx_elt_type, "f16"), 6332 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>], 6333 6334 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 6335 !and(!or(!eq(geom,"m16n16k16"), 6336 !eq(geom,"m8n32k16"), 6337 !eq(geom,"m32n8k16")), 6338 !or(!eq(ptx_elt_type, "u8"), 6339 !eq(ptx_elt_type, "s8"), 6340 !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>], 6341 6342 !and(!or(!eq(geom,"m16n16k16"), 6343 !eq(geom,"m8n32k16"), 6344 !eq(geom,"m32n8k16")), 6345 !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>], 6346 6347 !and(!eq(geom,"m16n16k8"), 6348 !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>], 6349 6350 !and(!eq(geom,"m16n16k8"), 6351 !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>], 6352 6353 // b1 -> s32 @ m8n8k128(b1) 6354 !and(!ne(op,"mma"), 6355 !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>], 6356 6357 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 6358 !and(!ne(op,"mma"), 6359 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>], 6360 6361 !or(!eq(geom,"m16n8k8"), 6362 !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>], 6363 6364 !and(!ne(ptx_elt_type,"f64"), 6365 !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>], 6366 6367 // mma m8n8k32 requires higher PTX version 6368 !and(!eq(op,"mma"), 6369 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>], 6370 6371 !and(!eq(ptx_elt_type,"f64"), 6372 !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>], 6373 6374 !and(!eq(op,"mma"), 6375 !or(!eq(geom, "m16n8k16"), 6376 !eq(geom, "m16n8k4"), 6377 !eq(geom, "m16n8k32"), 6378 !eq(geom, "m16n8k64"), 6379 !eq(geom, "m8n8k128"), 6380 !eq(geom, "m16n8k128"), 6381 !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>], 6382 6383 !and(!eq(op,"ldmatrix"), 6384 !eq(ptx_elt_type,"b16"), 6385 !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]); 6386 6387 // template DAGs for instruction inputs/output. 6388 dag Outs = !dag(outs, ptx_regs, reg_names); 6389 dag Ins = !dag(ins, ptx_regs, reg_names); 6390} 6391 6392// Convert dag of arguments into a dag to match given intrinsic. 6393class BuildPatternI<Intrinsic Intr, dag Ins> { 6394 // Build a dag pattern that matches the intrinsic call. 6395 dag ret = !foreach(tmp, Ins, 6396 !subst(imem, ADDRvar, 6397 !subst(MEMri64, ADDRri64, 6398 !subst(MEMri, ADDRri, 6399 !subst(ins, Intr, tmp))))); 6400} 6401 6402// Same as above, but uses PatFrag instead of an Intrinsic. 6403class BuildPatternPF<PatFrag Intr, dag Ins> { 6404 // Build a dag pattern that matches the intrinsic call. 6405 dag ret = !foreach(tmp, Ins, 6406 !subst(imem, ADDRvar, 6407 !subst(MEMri64, ADDRri64, 6408 !subst(MEMri, ADDRri, 6409 !subst(ins, Intr, tmp))))); 6410} 6411 6412// Common WMMA-related fields used for building patterns for all MMA instructions. 6413class WMMA_INSTR<string _Intr, list<dag> _Args> 6414 : NVPTXInst<(outs), (ins), "?", []> { 6415 Intrinsic Intr = !cast<Intrinsic>(_Intr); 6416 // Concatenate all arguments into a single dag. 6417 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 6418 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 6419 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 6420} 6421 6422// 6423// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6424// 6425 6426class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 6427 DAGOperand SrcOp> 6428 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 6429 [!con((ins SrcOp:$src), 6430 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6431 Requires<Frag.Predicates> { 6432 // Load/store intrinsics are overloaded on pointer's address space. 6433 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6434 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6435 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 6436 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 6437 // Build PatFrag that only matches particular address space. 6438 PatFrag IntrFrag = PatFrag<PFOperands, 6439 PFOperandsIntr, 6440 !cond(!eq(Space, ".shared"): AS_match.shared, 6441 !eq(Space, ".global"): AS_match.global, 6442 true: AS_match.generic)>; 6443 // Build AS-constrained pattern. 6444 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6445 6446 let OutOperandList = Frag.Outs; 6447 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6448 let AsmString = "wmma.load." 6449 # Frag.frag 6450 # ".sync" 6451 # "${ptx:aligned}" 6452 # "." # Layout 6453 # "." # Frag.geom 6454 # Space 6455 # "." # Frag.ptx_elt_type # " \t" 6456 # Frag.regstring 6457 # ", [$src]" 6458 # !if(WithStride, ", $ldm", "") 6459 # ";"; 6460} 6461 6462// 6463// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6464// 6465class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 6466 bit WithStride, DAGOperand DstOp> 6467 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 6468 [!con((ins DstOp:$dst), 6469 Frag.Ins, 6470 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6471 Requires<Frag.Predicates> { 6472 6473 // Load/store intrinsics are overloaded on pointer's address space. 6474 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6475 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6476 dag PFOperands = !con((ops node:$dst), 6477 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 6478 !if(WithStride, (ops node:$ldm), (ops))); 6479 // Build PatFrag that only matches particular address space. 6480 PatFrag IntrFrag = PatFrag<PFOperands, 6481 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 6482 !cond(!eq(Space, ".shared"): AS_match.shared, 6483 !eq(Space, ".global"): AS_match.global, 6484 true: AS_match.generic)>; 6485 // Build AS-constrained pattern. 6486 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6487 6488 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6489 let OutOperandList = (outs); 6490 let AsmString = "wmma.store.d.sync" 6491 # "${ptx:aligned}" 6492 # "." # Layout 6493 # "." # Frag.geom 6494 # Space 6495 # "." # Frag.ptx_elt_type 6496 # " \t[$dst]," 6497 # Frag.regstring 6498 # !if(WithStride, ", $ldm", "") 6499 # ";"; 6500} 6501 6502// Create all load/store variants 6503defset list<WMMA_INSTR> MMA_LDSTs = { 6504 foreach layout = ["row", "col"] in { 6505 foreach stride = [false, true] in { 6506 foreach space = [".global", ".shared", ""] in { 6507 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6508 foreach frag = NVVM_MMA_OPS.all_ld_ops in 6509 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6510 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 6511 foreach frag = NVVM_MMA_OPS.all_st_ops in 6512 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6513 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 6514 } // addr 6515 } // space 6516 } // stride 6517 } // layout 6518} // defset 6519 6520// B1 instruction variants need extra constraints. 6521class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 6522 string Op = b1op; 6523 WMMA_REGINFO Frag = FragA; 6524 list<Predicate> ret = !listconcat( 6525 FragA.Predicates, 6526 !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[]) 6527 ); 6528} 6529// WMMA.MMA 6530class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6531 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6532 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 6533 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 6534 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6535 // Requires does not seem to have effect on Instruction w/o Patterns. 6536 // We set it here anyways and propagate to the Pat<> we construct below. 6537 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6538 let OutOperandList = FragD.Outs; 6539 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6540 string TypeList = !cond( 6541 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 6542 # "." # FragC.ptx_elt_type, 6543 1: "." # FragD.ptx_elt_type 6544 # "." # FragA.ptx_elt_type 6545 # "." # FragB.ptx_elt_type 6546 # "." # FragC.ptx_elt_type, 6547 ); 6548 let AsmString = "wmma.mma" 6549 # b1op 6550 # ".sync" 6551 # "${ptx:aligned}" 6552 # "." # ALayout 6553 # "." # BLayout 6554 # "." # FragA.geom 6555 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 6556 # TypeList 6557 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 6558 # FragD.regstring # ",\n\t\t" 6559 # FragA.regstring # ",\n\t\t" 6560 # FragB.regstring # ",\n\t\t" 6561 # FragC.regstring # ";"; 6562} 6563 6564defset list<WMMA_INSTR> WMMAs = { 6565 foreach layout_a = ["row", "col"] in { 6566 foreach layout_b = ["row", "col"] in { 6567 foreach satf = [0, 1] in { 6568 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 6569 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 6570 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6571 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 6572 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 6573 WMMA_REGINFO<op[1], "wmma.mma">, 6574 WMMA_REGINFO<op[2], "wmma.mma">, 6575 WMMA_REGINFO<op[3], "wmma.mma">, 6576 layout_a, layout_b, satf, rnd, b1op>; 6577 } 6578 } // b1op 6579 } // op 6580 } // rnd 6581 } // satf 6582 } // layout_b 6583 } // layout_a 6584} // defset 6585 6586// MMA 6587class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6588 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6589 string ALayout, string BLayout, int Satfinite, string b1op> 6590 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 6591 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6592 // Requires does not seem to have effect on Instruction w/o Patterns. 6593 // We set it here anyways and propagate to the Pat<> we construct below. 6594 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6595 let OutOperandList = FragD.Outs; 6596 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6597 string TypeList = "." # FragD.ptx_elt_type 6598 # "." # FragA.ptx_elt_type 6599 # "." # FragB.ptx_elt_type 6600 # "." # FragC.ptx_elt_type; 6601 let AsmString = "mma.sync.aligned." 6602 # FragA.geom 6603 # "." # ALayout 6604 # "." # BLayout 6605 # !if(Satfinite, ".satfinite", "") 6606 # TypeList 6607 # b1op # "\n\t\t" 6608 # FragD.regstring # ",\n\t\t" 6609 # FragA.regstring # ",\n\t\t" 6610 # FragB.regstring # ",\n\t\t" 6611 # FragC.regstring # ";"; 6612} 6613 6614defset list<WMMA_INSTR> MMAs = { 6615 foreach layout_a = ["row", "col"] in { 6616 foreach layout_b = ["row", "col"] in { 6617 foreach satf = [0, 1] in { 6618 foreach op = NVVM_MMA_OPS.all_mma_ops in { 6619 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6620 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 6621 def : MMA<WMMA_REGINFO<op[0], "mma">, 6622 WMMA_REGINFO<op[1], "mma">, 6623 WMMA_REGINFO<op[2], "mma">, 6624 WMMA_REGINFO<op[3], "mma">, 6625 layout_a, layout_b, satf, b1op>; 6626 } 6627 } // b1op 6628 } // op 6629 } // satf 6630 } // layout_b 6631 } // layout_a 6632} // defset 6633 6634// 6635// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 6636// 6637class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, 6638 DAGOperand SrcOp> 6639 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, 6640 Requires<Frag.Predicates> { 6641 // Build PatFrag that only matches particular address space. 6642 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), 6643 !cond(!eq(Space, ".shared"): AS_match.shared, 6644 true: AS_match.generic)>; 6645 // Build AS-constrained pattern. 6646 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6647 6648 let OutOperandList = Frag.Outs; 6649 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6650 let AsmString = "ldmatrix.sync.aligned." 6651 # Frag.geom 6652 # "." # Frag.frag 6653 # !if(Transposed, ".trans", "") 6654 # Space 6655 # "." # Frag.ptx_elt_type 6656 # " " # Frag.regstring # ", [$src];"; 6657} 6658 6659// Create all ldmatrix variants 6660defset list<WMMA_INSTR> LDMATRIXs = { 6661 foreach transposed = [false, true] in { 6662 foreach space = [".shared", ""] in { 6663 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6664 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in 6665 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then 6666 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, 6667 addr>; 6668 } // addr 6669 } // space 6670 } // transposed 6671} // defset 6672 6673// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 6674// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 6675// the instruction record. 6676class MMA_PAT<WMMA_INSTR wi> 6677 : Pat<wi.IntrinsicPattern, 6678 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 6679 (wi ptx.version))>, 6680 Requires<wi.Predicates>; 6681 6682// Build intrinsic->instruction patterns for all MMA instructions. 6683foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in 6684 def : MMA_PAT<mma>; 6685 6686multiclass MAPA<string suffix, Intrinsic Intr> { 6687 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b), 6688 "mapa" # suffix # ".u32\t$d, $a, $b;", 6689 [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>, 6690 Requires<[hasSM<90>, hasPTX<78>]>; 6691 def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b), 6692 "mapa" # suffix # ".u32\t$d, $a, $b;", 6693 [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>, 6694 Requires<[hasSM<90>, hasPTX<78>]>; 6695 def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b), 6696 "mapa" # suffix # ".u64\t$d, $a, $b;", 6697 [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>, 6698 Requires<[hasSM<90>, hasPTX<78>]>; 6699 def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b), 6700 "mapa" # suffix # ".u64\t$d, $a, $b;", 6701 [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>, 6702 Requires<[hasSM<90>, hasPTX<78>]>; 6703} 6704 6705defm mapa : MAPA<"", int_nvvm_mapa>; 6706defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>; 6707 6708 6709multiclass GETCTARANK<string suffix, Intrinsic Intr> { 6710 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 6711 "getctarank" # suffix # ".u32\t$d, $a;", 6712 [(set Int32Regs:$d, (Intr Int32Regs:$a))]>, 6713 Requires<[hasSM<90>, hasPTX<78>]>; 6714 def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 6715 "getctarank" # suffix # ".u64\t$d, $a;", 6716 [(set Int32Regs:$d, (Intr Int64Regs:$a))]>, 6717 Requires<[hasSM<90>, hasPTX<78>]>; 6718} 6719 6720defm getctarank : GETCTARANK<"", int_nvvm_getctarank>; 6721defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>; 6722 6723def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), 6724 "mov.pred\t$d, %is_explicit_cluster;", 6725 [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, 6726 Requires<[hasSM<90>, hasPTX<78>]>; 6727