1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX<60>, hasSM<30>]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX<60>, hasSM<30>]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX<60>, hasSM<30>]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX<60>, hasSM<30>]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX<60>, hasSM<30>]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX<60>, hasSM<30>]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX<60>, hasSM<30>]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX<60>, hasSM<30>]>; 135class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr, 136 list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>: 137 NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>, 138 Requires<Preds>; 139 140def barrier_cluster_arrive: 141 INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>; 142def barrier_cluster_arrive_relaxed: 143 INT_BARRIER_CLUSTER<"arrive.relaxed", 144 int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>; 145def barrier_cluster_wait: 146 INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>; 147 148class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 149 bit offset_imm, bit mask_imm, bit threadmask_imm> 150 : NVPTXInst<(outs), (ins), "?", []> { 151 NVPTXRegClass rc = !cond( 152 !eq(reg, "i32"): Int32Regs, 153 !eq(reg, "f32"): Float32Regs); 154 string IntrName = "int_nvvm_shfl_" 155 # !if(sync, "sync_", "") 156 # mode 157 # "_" # reg 158 # !if(return_pred, "p", ""); 159 Intrinsic Intr = !cast<Intrinsic>(IntrName); 160 let InOperandList = !con( 161 !if(sync, 162 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 163 (ins)), 164 (ins rc:$src), 165 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 166 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 167 ); 168 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 169 let AsmString = "shfl." 170 # !if(sync, "sync.", "") 171 # mode # ".b32\t" 172 # "$dst" 173 # !if(return_pred, "|$pred", "") # ", " 174 # "$src, $offset, $mask" 175 # !if(sync, ", $threadmask", "") 176 # ";" 177 ; 178 let Pattern = [!con( 179 !foreach(tmp, OutOperandList, 180 !subst(outs, set, 181 !subst(i32imm, imm, tmp))), 182 (set !foreach(tmp, InOperandList, 183 !subst(ins, Intr, 184 !subst(i32imm, imm, tmp)))) 185 )]; 186} 187 188foreach sync = [false, true] in { 189 foreach mode = ["up", "down", "bfly", "idx"] in { 190 foreach regclass = ["i32", "f32"] in { 191 foreach return_pred = [false, true] in { 192 foreach offset_imm = [false, true] in { 193 foreach mask_imm = [false, true] in { 194 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 195 def : SHFL_INSTR<sync, mode, regclass, return_pred, 196 offset_imm, mask_imm, threadmask_imm>, 197 Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>; 198 } 199 } 200 } 201 } 202 } 203 } 204} 205 206// vote.{all,any,uni,ballot} 207multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 208 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 209 "vote." # mode # " \t$dest, $pred;", 210 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 211 Requires<[hasPTX<60>, hasSM<30>]>; 212} 213 214defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 215defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 216defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 217defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 218 219// vote.sync.{all,any,uni,ballot} 220multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 221 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 222 "vote.sync." # mode # " \t$dest, $pred, $mask;", 223 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 224 Requires<[hasPTX<60>, hasSM<30>]>; 225 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 226 "vote.sync." # mode #" \t$dest, $pred, $mask;", 227 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 228 Requires<[hasPTX<60>, hasSM<30>]>; 229} 230 231defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 232defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 233defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 234defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 235 236multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 237 Operand ImmOp> { 238 def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), 239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 240 [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, 241 Requires<[hasPTX<60>, hasSM<70>]>; 242 def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), 243 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 244 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 245 Requires<[hasPTX<60>, hasSM<70>]>; 246 def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), 247 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 248 [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, 249 Requires<[hasPTX<60>, hasSM<70>]>; 250 def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), 251 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 252 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 253 Requires<[hasPTX<60>, hasSM<70>]>; 254} 255 256defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 257 i32imm>; 258defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 259 i64imm>; 260 261multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 262 Operand ImmOp> { 263 def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 264 (ins i32imm:$mask, ImmOp:$value), 265 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 266 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 267 Requires<[hasPTX<60>, hasSM<70>]>; 268 def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 269 (ins Int32Regs:$mask, ImmOp:$value), 270 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 271 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 272 Requires<[hasPTX<60>, hasSM<70>]>; 273 def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 274 (ins i32imm:$mask, regclass:$value), 275 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 276 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 277 Requires<[hasPTX<60>, hasSM<70>]>; 278 def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 279 (ins Int32Regs:$mask, regclass:$value), 280 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 281 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 282 Requires<[hasPTX<60>, hasSM<70>]>; 283} 284defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 285 i32imm>; 286defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 287 i64imm>; 288 289multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 290 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 291 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 292 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, 293 Requires<[hasPTX<70>, hasSM<80>]>; 294} 295 296defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 297defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 298defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 299defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 300defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 301defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 302defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 303defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 304 305} // isConvergent = true 306 307//----------------------------------- 308// Explicit Memory Fence Functions 309//----------------------------------- 310class MEMBAR<string StrOp, Intrinsic IntOP> : 311 NVPTXInst<(outs), (ins), 312 StrOp, [(IntOP)]>; 313 314def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 315def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 316def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 317 318def INT_FENCE_SC_CLUSTER: 319 MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>, 320 Requires<[hasPTX<78>, hasSM<90>]>; 321 322//----------------------------------- 323// Async Copy Functions 324//----------------------------------- 325 326multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 327 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 328 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 329 [(Intrin Int32Regs:$addr)]>, 330 Requires<[hasPTX<70>, hasSM<80>]>; 331 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 332 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 333 [(Intrin Int64Regs:$addr)]>, 334 Requires<[hasPTX<70>, hasSM<80>]>; 335} 336 337defm CP_ASYNC_MBARRIER_ARRIVE : 338 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 339defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 340 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 341defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 342 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 343defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 344 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 345 346multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> { 347 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 348 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), 349 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 350 Requires<[hasPTX<70>, hasSM<80>]>; 351 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 352 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), 353 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 354 Requires<[hasPTX<70>, hasSM<80>]>; 355 // Variant with src_size parameter 356 def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size), 357 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 358 [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>, 359 Requires<[hasPTX<70>, hasSM<80>]>; 360 def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size), 361 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 362 [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>, 363 Requires<[hasPTX<70>, hasSM<80>]>; 364 def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size), 365 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 366 [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>, 367 Requires<[hasPTX<70>, hasSM<80>]>; 368 def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size), 369 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 370 [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>, 371 Requires<[hasPTX<70>, hasSM<80>]>; 372} 373 374defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 375 CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4, 376 int_nvvm_cp_async_ca_shared_global_4_s>; 377 378defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 379 CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8, 380 int_nvvm_cp_async_ca_shared_global_8_s>; 381 382defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 383 CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16, 384 int_nvvm_cp_async_ca_shared_global_16_s>; 385 386defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 387 CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16, 388 int_nvvm_cp_async_cg_shared_global_16_s>; 389 390def CP_ASYNC_COMMIT_GROUP : 391 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 392 Requires<[hasPTX<70>, hasSM<80>]>; 393 394def CP_ASYNC_WAIT_GROUP : 395 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 396 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, 397 Requires<[hasPTX<70>, hasSM<80>]>; 398 399def CP_ASYNC_WAIT_ALL : 400 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 401 [(int_nvvm_cp_async_wait_all)]>, 402 Requires<[hasPTX<70>, hasSM<80>]>; 403 404//----------------------------------- 405// MBarrier Functions 406//----------------------------------- 407 408multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 409 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 410 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 411 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, 412 Requires<[hasPTX<70>, hasSM<80>]>; 413 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 414 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 415 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, 416 Requires<[hasPTX<70>, hasSM<80>]>; 417} 418 419defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 420defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 421 int_nvvm_mbarrier_init_shared>; 422 423multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 424 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 425 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 426 [(Intrin Int32Regs:$addr)]>, 427 Requires<[hasPTX<70>, hasSM<80>]>; 428 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 429 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 430 [(Intrin Int64Regs:$addr)]>, 431 Requires<[hasPTX<70>, hasSM<80>]>; 432} 433 434defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 435defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 436 int_nvvm_mbarrier_inval_shared>; 437 438multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 439 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 440 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 441 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 442 Requires<[hasPTX<70>, hasSM<80>]>; 443 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 444 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 445 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 446 Requires<[hasPTX<70>, hasSM<80>]>; 447} 448 449defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 450defm MBARRIER_ARRIVE_SHARED : 451 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 452 453multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 454 def _32 : NVPTXInst<(outs Int64Regs:$state), 455 (ins Int32Regs:$addr, Int32Regs:$count), 456 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 457 ".b64 $state, [$addr], $count;"), 458 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 459 Requires<[hasPTX<70>, hasSM<80>]>; 460 def _64 : NVPTXInst<(outs Int64Regs:$state), 461 (ins Int64Regs:$addr, Int32Regs:$count), 462 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 463 ".b64 $state, [$addr], $count;"), 464 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 465 Requires<[hasPTX<70>, hasSM<80>]>; 466} 467 468defm MBARRIER_ARRIVE_NOCOMPLETE : 469 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 470defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 471 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 472 473multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 474 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 475 !strconcat("mbarrier.arrive_drop", AddrSpace, 476 ".b64 $state, [$addr];"), 477 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 478 Requires<[hasPTX<70>, hasSM<80>]>; 479 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 480 !strconcat("mbarrier.arrive_drop", AddrSpace, 481 ".b64 $state, [$addr];"), 482 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 483 Requires<[hasPTX<70>, hasSM<80>]>; 484} 485 486defm MBARRIER_ARRIVE_DROP : 487 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 488defm MBARRIER_ARRIVE_DROP_SHARED : 489 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 490 491multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 492 def _32 : NVPTXInst<(outs Int64Regs:$state), 493 (ins Int32Regs:$addr, Int32Regs:$count), 494 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 495 ".b64 $state, [$addr], $count;"), 496 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 497 Requires<[hasPTX<70>, hasSM<80>]>; 498 def _64 : NVPTXInst<(outs Int64Regs:$state), 499 (ins Int64Regs:$addr, Int32Regs:$count), 500 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 501 ".b64 $state, [$addr], $count;"), 502 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 503 Requires<[hasPTX<70>, hasSM<80>]>; 504} 505 506defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 507 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 508defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 509 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 510 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 511 512multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 513 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 514 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 515 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, 516 Requires<[hasPTX<70>, hasSM<80>]>; 517 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 518 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 519 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, 520 Requires<[hasPTX<70>, hasSM<80>]>; 521} 522 523defm MBARRIER_TEST_WAIT : 524 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 525defm MBARRIER_TEST_WAIT_SHARED : 526 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 527 528class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 529 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 530 "mbarrier.pending_count.b64 $res, $state;", 531 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, 532 Requires<[hasPTX<70>, hasSM<80>]>; 533 534def MBARRIER_PENDING_COUNT : 535 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 536 537//----------------------------------- 538// Math Functions 539//----------------------------------- 540 541// Map min(1.0, max(0.0, x)) to sat(x) 542// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 543// NaN 544// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 545// Same story for fmax, fmin. 546 547def : Pat<(int_nvvm_fmin_f immFloat1, 548 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 549 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 550def : Pat<(int_nvvm_fmin_f immFloat1, 551 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 552 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 553def : Pat<(int_nvvm_fmin_f 554 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 555 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 556def : Pat<(int_nvvm_fmin_f 557 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 558 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 559 560def : Pat<(int_nvvm_fmin_d immDouble1, 561 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 562 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 563def : Pat<(int_nvvm_fmin_d immDouble1, 564 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 565 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 566def : Pat<(int_nvvm_fmin_d 567 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 568 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 569def : Pat<(int_nvvm_fmin_d 570 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 571 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 572 573 574// We need a full string for OpcStr here because we need to deal with case like 575// INT_PTX_RECIP. 576class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 577 NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 578 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 579 OpcStr, 580 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>, 581 Requires<Preds>; 582 583// We need a full string for OpcStr here because we need to deal with the case 584// like INT_PTX_NATIVE_POWR_F. 585class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 586 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP, 587 list<Predicate> Preds = []> 588 : NVPTXInst<(outs t_regclass:$dst), 589 (ins s0_regclass:$src0, s1_regclass:$src1), 590 OpcStr, 591 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>, 592 Requires<Preds>; 593 594class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 595 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 596 NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 597 : NVPTXInst<(outs t_regclass:$dst), 598 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 599 OpcStr, 600 [(set t_regclass:$dst, 601 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>, 602 Requires<Preds>; 603 604// 605// MISC 606// 607 608def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 609 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 610 611// 612// Min Max 613// 614 615def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 616 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 617def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 618 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 619def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;", 620 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f, 621 [hasPTX<70>, hasSM<80>]>; 622def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;", 623 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f, 624 [hasPTX<70>, hasSM<80>]>; 625def INT_NVVM_FMIN_XORSIGN_ABS_F : 626 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;", 627 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f, 628 [hasPTX<72>, hasSM<86>]>; 629def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F : 630 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 631 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f, 632 [hasPTX<72>, hasSM<86>]>; 633def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F : 634 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 635 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f, 636 [hasPTX<72>, hasSM<86>]>; 637def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F : 638 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 639 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f, 640 [hasPTX<72>, hasSM<86>]>; 641 642def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 643 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 644def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 645 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 646def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;", 647 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f, 648 [hasPTX<70>, hasSM<80>]>; 649def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;", 650 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f, 651 [hasPTX<70>, hasSM<80>]>; 652def INT_NVVM_FMAX_XORSIGN_ABS_F : 653 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;", 654 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f, 655 [hasPTX<72>, hasSM<86>]>; 656def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F : 657 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f, 659 [hasPTX<72>, hasSM<86>]>; 660def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F : 661 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 662 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f, 663 [hasPTX<72>, hasSM<86>]>; 664def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F : 665 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 666 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f, 667 [hasPTX<72>, hasSM<86>]>; 668 669def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 670 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 671def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 672 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 673 674// 675// Min Max f16, f16x2, bf16, bf16x2 676// 677 678class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 679 list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> { 680 string Variant = V; 681 Intrinsic Intr = I; 682 NVPTXRegClass RegClass = RC; 683 list<Predicate> Predicates = Preds; 684} 685 686multiclass MIN_MAX<string IntName> { 687 foreach P = [ 688 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, 689 int_nvvm_fmax_f16), Int16Regs>, 690 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, 691 int_nvvm_fmax_ftz_f16), Int16Regs>, 692 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, 693 int_nvvm_fmax_nan_f16), Int16Regs>, 694 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), 695 int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>, 696 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), 697 int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), 698 Int16Regs, [hasPTX<72>, hasSM<86>]>, 699 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), 700 int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), 701 Int16Regs, [hasPTX<72>, hasSM<86>]>, 702 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 703 int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), 704 Int16Regs, [hasPTX<72>, hasSM<86>]>, 705 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 706 int_nvvm_fmin_ftz_nan_xorsign_abs_f16, 707 int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>, 708 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, 709 int_nvvm_fmax_f16x2), Int32Regs>, 710 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), 711 int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>, 712 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), 713 int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>, 714 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), 715 int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>, 716 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 717 int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), 718 Int32Regs, [hasPTX<72>, hasSM<86>]>, 719 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 720 int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), 721 Int32Regs, [hasPTX<72>, hasSM<86>]>, 722 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 723 int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), 724 Int32Regs, [hasPTX<72>, hasSM<86>]>, 725 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 726 int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, 727 int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), 728 Int32Regs, [hasPTX<72>, hasSM<86>]>, 729 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), 730 int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, 731 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, 732 int_nvvm_fmax_nan_bf16), Int16Regs>, 733 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"), 734 int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16), 735 Int16Regs, [hasPTX<72>, hasSM<86>]>, 736 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"), 737 int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16), 738 Int16Regs, [hasPTX<72>, hasSM<86>]>, 739 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2, 740 int_nvvm_fmax_bf16x2), Int32Regs>, 741 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"), 742 int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>, 743 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 744 int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2), 745 Int32Regs, [hasPTX<72>, hasSM<86>]>, 746 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 747 int_nvvm_fmin_nan_xorsign_abs_bf16x2, 748 int_nvvm_fmax_nan_xorsign_abs_bf16x2), 749 Int32Regs, [hasPTX<72>, hasSM<86>]>] in { 750 def P.Variant : F_MATH_2<!strconcat( 751 IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"), 752 P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 753 } 754} 755 756defm INT_NVVM_FMIN : MIN_MAX<"min">; 757defm INT_NVVM_FMAN : MIN_MAX<"max">; 758 759// 760// Multiplication 761// 762 763def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 764 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 765def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 766 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 767 768def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 769 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 770def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 771 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 772 773def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 774 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 775def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 776 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 777def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 778 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 779def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 780 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 781def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 782 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 783def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 784 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 785def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 786 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 787def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 788 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 789 790def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 791 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 792def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 793 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 794def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 795 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 796def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 797 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 798 799def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 800 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 801def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 802 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 803 804// 805// Div 806// 807 808def INT_NVVM_DIV_APPROX_FTZ_F 809 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 810 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 811def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 812 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 813 814def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 815 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 816def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 817 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 818def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 819 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 820def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 821 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 822def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 823 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 824def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 825 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 826def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 827 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 828def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 829 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 830 831def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 832 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 833def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 834 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 835def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 836 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 837def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 838 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 839 840// 841// Sad 842// 843 844def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 845 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 846def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 847 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 848 849// 850// Floor Ceil 851// 852 853def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 854 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 855def : Pat<(int_nvvm_floor_f Float32Regs:$a), 856 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 857def : Pat<(int_nvvm_floor_d Float64Regs:$a), 858 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 859 860def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 861 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 862def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 863 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 864def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 865 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 866 867// 868// Abs 869// 870 871def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 872 Float32Regs, int_nvvm_fabs_ftz_f>; 873def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 874 Float32Regs, int_nvvm_fabs_f>; 875 876def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 877 Float64Regs, int_nvvm_fabs_d>; 878 879// 880// Abs, Neg bf16, bf16x2 881// 882 883def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs, 884 Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>; 885def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs, 886 Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>; 887def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs, 888 Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>; 889def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs, 890 Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>; 891 892// 893// Round 894// 895 896def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 897 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 898def : Pat<(int_nvvm_round_f Float32Regs:$a), 899 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 900def : Pat<(int_nvvm_round_d Float64Regs:$a), 901 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 902 903// 904// Trunc 905// 906 907def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 908 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 909def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 910 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 911def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 912 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 913 914// 915// Saturate 916// 917 918def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 919 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 920def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 921 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 922def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 923 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 924 925// 926// Exp2 Log2 927// 928 929def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 930 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 931def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 932 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 933def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 934 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 935def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", 936 Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; 937def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", 938 Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; 939 940def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 941 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 942def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 943 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 944def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 945 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 946 947// 948// Sin Cos 949// 950 951def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 952 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 953def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 954 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 955 956def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 957 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 958def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 959 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 960 961// 962// Fma 963// 964 965class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 966 list<Predicate> Preds = []> { 967 string Variant = V; 968 Intrinsic Intr = I; 969 NVPTXRegClass RegClass = RC; 970 list<Predicate> Predicates = Preds; 971} 972 973multiclass FMA_INST { 974 foreach P = [ 975 FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>, 976 FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>, 977 FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>, 978 FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>, 979 980 FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>, 981 FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>, 982 FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>, 983 FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>, 984 FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>, 985 FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>, 986 FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, 987 FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, 988 989 FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, 990 FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs, 991 [hasPTX<42>, hasSM<53>]>, 992 FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs, 993 [hasPTX<42>, hasSM<53>]>, 994 FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs, 995 [hasPTX<42>, hasSM<53>]>, 996 FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs, 997 [hasPTX<70>, hasSM<80>]>, 998 FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs, 999 [hasPTX<70>, hasSM<80>]>, 1000 1001 FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>, 1002 FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs, 1003 [hasPTX<70>, hasSM<80>]>, 1004 FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs, 1005 [hasPTX<70>, hasSM<80>]>, 1006 FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs, 1007 [hasPTX<70>, hasSM<80>]>, 1008 FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, 1009 [hasPTX<70>, hasSM<80>]>, 1010 FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs, 1011 [hasPTX<70>, hasSM<80>]>, 1012 1013 FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs, 1014 [hasPTX<42>, hasSM<53>]>, 1015 FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs, 1016 [hasPTX<42>, hasSM<53>]>, 1017 FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs, 1018 [hasPTX<42>, hasSM<53>]>, 1019 FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, 1020 Int32Regs, [hasPTX<42>, hasSM<53>]>, 1021 FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs, 1022 [hasPTX<70>, hasSM<80>]>, 1023 FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, 1024 Int32Regs, [hasPTX<70>, hasSM<80>]>, 1025 FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, 1026 [hasPTX<70>, hasSM<80>]>, 1027 FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, 1028 [hasPTX<70>, hasSM<80>]> 1029 ] in { 1030 def P.Variant : 1031 F_MATH_3<!strconcat("fma", 1032 !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"), 1033 P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 1034 } 1035} 1036 1037defm INT_NVVM_FMA : FMA_INST; 1038 1039// 1040// Rcp 1041// 1042 1043def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 1044 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 1045def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 1046 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 1047def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 1048 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 1049def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 1050 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 1051def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 1052 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 1053def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 1054 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 1055def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 1056 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 1057def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 1058 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 1059 1060def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 1061 Float64Regs, int_nvvm_rcp_rn_d>; 1062def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 1063 Float64Regs, int_nvvm_rcp_rz_d>; 1064def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 1065 Float64Regs, int_nvvm_rcp_rm_d>; 1066def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 1067 Float64Regs, int_nvvm_rcp_rp_d>; 1068 1069def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;", 1070 Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>; 1071def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 1072 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 1073 1074// 1075// Sqrt 1076// 1077 1078def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 1079 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 1080def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 1081 Float32Regs, int_nvvm_sqrt_rn_f>; 1082def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 1083 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 1084def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 1085 Float32Regs, int_nvvm_sqrt_rz_f>; 1086def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 1087 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 1088def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 1089 Float32Regs, int_nvvm_sqrt_rm_f>; 1090def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 1091 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 1092def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 1093 Float32Regs, int_nvvm_sqrt_rp_f>; 1094def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 1095 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 1096def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 1097 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 1098 1099def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 1100 Float64Regs, int_nvvm_sqrt_rn_d>; 1101def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 1102 Float64Regs, int_nvvm_sqrt_rz_d>; 1103def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 1104 Float64Regs, int_nvvm_sqrt_rm_d>; 1105def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 1106 Float64Regs, int_nvvm_sqrt_rp_d>; 1107 1108// nvvm_sqrt intrinsic 1109def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1110 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 1111def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1112 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 1113def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1114 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 1115def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1116 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 1117 1118// 1119// Rsqrt 1120// 1121 1122def INT_NVVM_RSQRT_APPROX_FTZ_F 1123 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 1124 int_nvvm_rsqrt_approx_ftz_f>; 1125def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 1126 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 1127def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 1128 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 1129 1130// 1131// Add 1132// 1133 1134def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 1135 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 1136def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 1137 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 1138def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 1139 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 1140def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 1141 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 1142def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 1143 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 1144def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 1145 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 1146def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 1147 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 1148def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 1149 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 1150 1151def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 1152 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 1153def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 1154 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 1155def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 1156 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 1157def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 1158 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 1159 1160// 1161// Convert 1162// 1163 1164def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 1165 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 1166def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 1167 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 1168def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 1169 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 1170def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 1171 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 1172def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 1173 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 1174def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 1175 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 1176def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 1177 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 1178def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 1179 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 1180 1181def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 1182 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 1183def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 1184 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 1185def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 1186 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 1187def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 1188 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 1189 1190def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 1191 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 1192def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 1193 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 1194def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 1195 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 1196def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 1197 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 1198 1199def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 1200 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 1201def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 1202 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 1203def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 1204 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 1205def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 1206 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 1207 1208def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 1209 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 1210def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 1211 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 1212def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 1213 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 1214def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 1215 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 1216 1217def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 1218 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1219def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 1220 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 1221def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 1222 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1223def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 1224 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 1225def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 1226 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1227def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 1228 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 1229def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 1230 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1231def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 1232 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 1233 1234def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 1235 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1236def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 1237 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 1238def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 1239 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1240def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 1241 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 1242def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 1243 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1244def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 1245 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 1246def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 1247 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1248def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 1249 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 1250 1251def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 1252 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 1253def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 1254 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 1255def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 1256 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 1257def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 1258 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 1259 1260def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 1261 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 1262def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 1263 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 1264def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 1265 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 1266def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 1267 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 1268 1269def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b), 1270 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1271def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1272 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1273def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b), 1274 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1275def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1276 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1277 1278def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b), 1279 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1280def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1281 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1282def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b), 1283 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1284def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1285 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1286 1287def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a), 1288 (CVT_bf16_f32 Float32Regs:$a, CvtRN)>; 1289def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a), 1290 (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>; 1291def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a), 1292 (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>; 1293def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a), 1294 (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>; 1295 1296def CVT_tf32_f32 : 1297 NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a), 1298 "cvt.rna.tf32.f32 \t$dest, $a;", 1299 [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>; 1300 1301def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1302 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1303 1304def INT_NVVM_D2I_LO : F_MATH_1< 1305 !strconcat("{{\n\t", 1306 ".reg .b32 %temp; \n\t", 1307 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1308 "}}"), 1309 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1310def INT_NVVM_D2I_HI : F_MATH_1< 1311 !strconcat("{{\n\t", 1312 ".reg .b32 %temp; \n\t", 1313 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1314 "}}"), 1315 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1316 1317def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 1318 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1319def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 1320 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 1321def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 1322 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1323def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 1324 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 1325def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 1326 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1327def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 1328 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 1329def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 1330 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1331def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 1332 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 1333 1334def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 1335 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1336def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 1337 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 1338def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 1339 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1340def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 1341 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 1342def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 1343 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1344def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 1345 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 1346def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 1347 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1348def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 1349 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 1350 1351def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 1352 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 1353def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 1354 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 1355def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 1356 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 1357def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 1358 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 1359 1360def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 1361 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 1362def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 1363 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 1364def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 1365 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 1366def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 1367 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 1368 1369def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 1370 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 1371def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 1372 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 1373def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 1374 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 1375def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 1376 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 1377 1378def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 1379 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 1380def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 1381 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 1382def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 1383 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 1384def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 1385 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 1386 1387def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 1388 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 1389def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 1390 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 1391def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 1392 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 1393def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 1394 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 1395 1396def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 1397 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 1398def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 1399 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 1400def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 1401 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 1402def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 1403 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 1404 1405 1406def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 1407 (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; 1408def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 1409 (CVT_f16_f32 Float32Regs:$a, CvtRN)>; 1410 1411// 1412// Bitcast 1413// 1414 1415def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 1416 Float32Regs, int_nvvm_bitcast_f2i>; 1417def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 1418 Int32Regs, int_nvvm_bitcast_i2f>; 1419 1420def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1421 Int64Regs, int_nvvm_bitcast_ll2d>; 1422def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1423 Float64Regs, int_nvvm_bitcast_d2ll>; 1424 1425// 1426// FNS 1427// 1428 1429class INT_FNS_MBO<dag ins, dag Operands> 1430 : NVPTXInst<(outs Int32Regs:$dst), ins, 1431 "fns.b32 \t$dst, $mask, $base, $offset;", 1432 [(set Int32Regs:$dst, Operands )]>, 1433 Requires<[hasPTX<60>, hasSM<30>]>; 1434 1435def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1436 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1437def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1438 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1439def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1440 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1441def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1442 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1443def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1444 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1445def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1446 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1447def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1448 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1449def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1450 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1451 1452//----------------------------------- 1453// Atomic Functions 1454//----------------------------------- 1455 1456class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1457 : PatFrag<ops, frag, AS_match.global>; 1458class ATOMIC_SHARED_CHK <dag ops, dag frag> 1459 : PatFrag<ops, frag, AS_match.shared>; 1460class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1461 : PatFrag<ops, frag, AS_match.generic>; 1462 1463multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1464 ValueType regT, NVPTXRegClass regclass, 1465 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1466 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1467 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1468 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1469 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, 1470 Requires<Pred>; 1471 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1472 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1473 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, 1474 Requires<Pred>; 1475} 1476multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1477 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1478 list<Predicate> Pred = []> { 1479 defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1480 IntOp, IMMType, IMM, Pred>; 1481 defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1482 IntOp, IMMType, IMM, Pred>; 1483} 1484 1485// has 2 operands, neg the second one 1486multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1487 ValueType regT, NVPTXRegClass regclass, 1488 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1489 list<Predicate> Pred> { 1490 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1491 !strconcat( 1492 "{{ \n\t", 1493 ".reg \t.s", TypeStr, " temp; \n\t", 1494 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1495 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1496 "}}"), 1497 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, 1498 Requires<Pred>; 1499} 1500multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr, 1501 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { 1502 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1503 IntOp, Pred> ; 1504 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1505 IntOp, Pred> ; 1506} 1507 1508// has 3 operands 1509multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1510 ValueType regT, NVPTXRegClass regclass, 1511 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1512 Operand IMMType, list<Predicate> Pred> { 1513 def reg : NVPTXInst<(outs regclass:$dst), 1514 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1515 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1516 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, 1517 Requires<Pred>; 1518 1519 def imm1 : NVPTXInst<(outs regclass:$dst), 1520 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1521 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1522 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, 1523 Requires<Pred>; 1524 1525 def imm2 : NVPTXInst<(outs regclass:$dst), 1526 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1527 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1528 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, 1529 Requires<Pred>; 1530 1531 def imm3 : NVPTXInst<(outs regclass:$dst), 1532 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1533 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1534 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, 1535 Requires<Pred>; 1536} 1537multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1538 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1539 defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1540 IntOp, IMMType, Pred>; 1541 defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1542 IntOp, IMMType, Pred>; 1543} 1544 1545// atom_add 1546 1547def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1548 (atomic_load_add_32 node:$a, node:$b)>; 1549def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1550 (atomic_load_add_32 node:$a, node:$b)>; 1551def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1552 (atomic_load_add_32 node:$a, node:$b)>; 1553def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1554 (atomic_load_add_64 node:$a, node:$b)>; 1555def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1556 (atomic_load_add_64 node:$a, node:$b)>; 1557def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1558 (atomic_load_add_64 node:$a, node:$b)>; 1559def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1560 (atomic_load_fadd node:$a, node:$b)>; 1561def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1562 (atomic_load_fadd node:$a, node:$b)>; 1563def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1564 (atomic_load_fadd node:$a, node:$b)>; 1565 1566defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add", 1567 atomic_load_add_32_g, i32imm, imm>; 1568defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add", 1569 atomic_load_add_32_s, i32imm, imm>; 1570defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add", 1571 atomic_load_add_32_gen, i32imm, imm>; 1572defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1573 ".add", atomic_load_add_32_gen, i32imm, imm>; 1574 1575defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add", 1576 atomic_load_add_64_g, i64imm, imm>; 1577defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add", 1578 atomic_load_add_64_s, i64imm, imm>; 1579defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add", 1580 atomic_load_add_64_gen, i64imm, imm>; 1581defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 1582 ".add", atomic_load_add_64_gen, i64imm, imm>; 1583 1584defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add", 1585 atomic_load_add_g, f32imm, fpimm>; 1586defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add", 1587 atomic_load_add_s, f32imm, fpimm>; 1588defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add", 1589 atomic_load_add_gen, f32imm, fpimm>; 1590 1591defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add", 1592 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1593defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add", 1594 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1595defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add", 1596 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1597 1598// atom_sub 1599 1600def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1601 (atomic_load_sub_32 node:$a, node:$b)>; 1602def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1603 (atomic_load_sub_32 node:$a, node:$b)>; 1604def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1605 (atomic_load_sub_32 node:$a, node:$b)>; 1606def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1607 (atomic_load_sub_64 node:$a, node:$b)>; 1608def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1609 (atomic_load_sub_64 node:$a, node:$b)>; 1610def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1611 (atomic_load_sub_64 node:$a, node:$b)>; 1612 1613defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add", 1614 atomic_load_sub_32_g>; 1615defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add", 1616 atomic_load_sub_64_g>; 1617defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add", 1618 atomic_load_sub_32_gen>; 1619defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", 1620 ".add", atomic_load_sub_32_gen>; 1621defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add", 1622 atomic_load_sub_32_s>; 1623defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add", 1624 atomic_load_sub_64_s>; 1625defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add", 1626 atomic_load_sub_64_gen>; 1627defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", 1628 ".add", atomic_load_sub_64_gen>; 1629 1630// atom_swap 1631 1632def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1633 (atomic_swap_32 node:$a, node:$b)>; 1634def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1635 (atomic_swap_32 node:$a, node:$b)>; 1636def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1637 (atomic_swap_32 node:$a, node:$b)>; 1638def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1639 (atomic_swap_64 node:$a, node:$b)>; 1640def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1641 (atomic_swap_64 node:$a, node:$b)>; 1642def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1643 (atomic_swap_64 node:$a, node:$b)>; 1644 1645defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch", 1646 atomic_swap_32_g, i32imm, imm>; 1647defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch", 1648 atomic_swap_32_s, i32imm, imm>; 1649defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch", 1650 atomic_swap_32_gen, i32imm, imm>; 1651defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 1652 ".exch", atomic_swap_32_gen, i32imm, imm>; 1653defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch", 1654 atomic_swap_64_g, i64imm, imm>; 1655defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch", 1656 atomic_swap_64_s, i64imm, imm>; 1657defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch", 1658 atomic_swap_64_gen, i64imm, imm>; 1659defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 1660 ".exch", atomic_swap_64_gen, i64imm, imm>; 1661 1662// atom_max 1663 1664def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1665 , (atomic_load_max_32 node:$a, node:$b)>; 1666def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1667 (atomic_load_max_32 node:$a, node:$b)>; 1668def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1669 (atomic_load_max_32 node:$a, node:$b)>; 1670def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1671 , (atomic_load_max_64 node:$a, node:$b)>; 1672def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1673 (atomic_load_max_64 node:$a, node:$b)>; 1674def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1675 (atomic_load_max_64 node:$a, node:$b)>; 1676def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1677 (atomic_load_umax_32 node:$a, node:$b)>; 1678def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1679 (atomic_load_umax_32 node:$a, node:$b)>; 1680def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1681 (atomic_load_umax_32 node:$a, node:$b)>; 1682def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1683 (atomic_load_umax_64 node:$a, node:$b)>; 1684def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1685 (atomic_load_umax_64 node:$a, node:$b)>; 1686def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1687 (atomic_load_umax_64 node:$a, node:$b)>; 1688 1689defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32", 1690 ".max", atomic_load_max_32_g, i32imm, imm>; 1691defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32", 1692 ".max", atomic_load_max_32_s, i32imm, imm>; 1693defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max", 1694 atomic_load_max_32_gen, i32imm, imm>; 1695defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 1696 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1697defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64", 1698 ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>; 1699defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64", 1700 ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>; 1701defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max", 1702 atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>; 1703defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 1704 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>; 1705defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1706 ".max", atomic_load_umax_32_g, i32imm, imm>; 1707defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", 1708 ".max", atomic_load_umax_32_s, i32imm, imm>; 1709defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max", 1710 atomic_load_umax_32_gen, i32imm, imm>; 1711defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 1712 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1713defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 1714 ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>; 1715defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", 1716 ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>; 1717defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max", 1718 atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>; 1719defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 1720 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>; 1721 1722// atom_min 1723 1724def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1725 (atomic_load_min_32 node:$a, node:$b)>; 1726def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1727 (atomic_load_min_32 node:$a, node:$b)>; 1728def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1729 (atomic_load_min_32 node:$a, node:$b)>; 1730def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1731 (atomic_load_min_64 node:$a, node:$b)>; 1732def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1733 (atomic_load_min_64 node:$a, node:$b)>; 1734def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1735 (atomic_load_min_64 node:$a, node:$b)>; 1736def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1737 (atomic_load_umin_32 node:$a, node:$b)>; 1738def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1739 (atomic_load_umin_32 node:$a, node:$b)>; 1740def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1741 (atomic_load_umin_32 node:$a, node:$b)>; 1742def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1743 (atomic_load_umin_64 node:$a, node:$b)>; 1744def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1745 (atomic_load_umin_64 node:$a, node:$b)>; 1746def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1747 (atomic_load_umin_64 node:$a, node:$b)>; 1748 1749defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32", 1750 ".min", atomic_load_min_32_g, i32imm, imm>; 1751defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32", 1752 ".min", atomic_load_min_32_s, i32imm, imm>; 1753defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min", 1754 atomic_load_min_32_gen, i32imm, imm>; 1755defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 1756 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1757defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64", 1758 ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>; 1759defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64", 1760 ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>; 1761defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min", 1762 atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>; 1763defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 1764 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>; 1765defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1766 ".min", atomic_load_umin_32_g, i32imm, imm>; 1767defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", 1768 ".min", atomic_load_umin_32_s, i32imm, imm>; 1769defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min", 1770 atomic_load_umin_32_gen, i32imm, imm>; 1771defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 1772 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1773defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 1774 ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>; 1775defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", 1776 ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>; 1777defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min", 1778 atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>; 1779defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 1780 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>; 1781 1782// atom_inc atom_dec 1783 1784def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1785 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1786def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1787 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1788def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1789 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1790def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1791 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1792def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1793 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1794def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1795 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1796 1797defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc", 1798 atomic_load_inc_32_g, i32imm, imm>; 1799defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc", 1800 atomic_load_inc_32_s, i32imm, imm>; 1801defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc", 1802 atomic_load_inc_32_gen, i32imm, imm>; 1803defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1804 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1805defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec", 1806 atomic_load_dec_32_g, i32imm, imm>; 1807defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec", 1808 atomic_load_dec_32_s, i32imm, imm>; 1809defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec", 1810 atomic_load_dec_32_gen, i32imm, imm>; 1811defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 1812 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1813 1814// atom_and 1815 1816def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1817 (atomic_load_and_32 node:$a, node:$b)>; 1818def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1819 (atomic_load_and_32 node:$a, node:$b)>; 1820def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1821 (atomic_load_and_32 node:$a, node:$b)>; 1822def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1823 (atomic_load_and_64 node:$a, node:$b)>; 1824def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1825 (atomic_load_and_64 node:$a, node:$b)>; 1826def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1827 (atomic_load_and_64 node:$a, node:$b)>; 1828 1829defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and", 1830 atomic_load_and_32_g, i32imm, imm>; 1831defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and", 1832 atomic_load_and_32_s, i32imm, imm>; 1833defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and", 1834 atomic_load_and_32_gen, i32imm, imm>; 1835defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 1836 ".and", atomic_load_and_32_gen, i32imm, imm>; 1837defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and", 1838 atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>; 1839defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and", 1840 atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>; 1841defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and", 1842 atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>; 1843defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 1844 ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>; 1845 1846// atom_or 1847 1848def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1849 (atomic_load_or_32 node:$a, node:$b)>; 1850def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1851 (atomic_load_or_32 node:$a, node:$b)>; 1852def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1853 (atomic_load_or_32 node:$a, node:$b)>; 1854def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1855 (atomic_load_or_64 node:$a, node:$b)>; 1856def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1857 (atomic_load_or_64 node:$a, node:$b)>; 1858def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1859 (atomic_load_or_64 node:$a, node:$b)>; 1860 1861defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or", 1862 atomic_load_or_32_g, i32imm, imm>; 1863defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or", 1864 atomic_load_or_32_gen, i32imm, imm>; 1865defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 1866 ".or", atomic_load_or_32_gen, i32imm, imm>; 1867defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or", 1868 atomic_load_or_32_s, i32imm, imm>; 1869defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or", 1870 atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>; 1871defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or", 1872 atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>; 1873defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 1874 ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>; 1875defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or", 1876 atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>; 1877 1878// atom_xor 1879 1880def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1881 (atomic_load_xor_32 node:$a, node:$b)>; 1882def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1883 (atomic_load_xor_32 node:$a, node:$b)>; 1884def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1885 (atomic_load_xor_32 node:$a, node:$b)>; 1886def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1887 (atomic_load_xor_64 node:$a, node:$b)>; 1888def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1889 (atomic_load_xor_64 node:$a, node:$b)>; 1890def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1891 (atomic_load_xor_64 node:$a, node:$b)>; 1892 1893defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor", 1894 atomic_load_xor_32_g, i32imm, imm>; 1895defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor", 1896 atomic_load_xor_32_s, i32imm, imm>; 1897defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor", 1898 atomic_load_xor_32_gen, i32imm, imm>; 1899defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 1900 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1901defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor", 1902 atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>; 1903defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor", 1904 atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>; 1905defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor", 1906 atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>; 1907defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 1908 ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>; 1909 1910// atom_cas 1911 1912def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1913 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1914def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1915 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1916def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1917 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1918def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1919 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1920def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1921 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1922def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1923 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1924 1925defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas", 1926 atomic_cmp_swap_32_g, i32imm>; 1927defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas", 1928 atomic_cmp_swap_32_s, i32imm>; 1929defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas", 1930 atomic_cmp_swap_32_gen, i32imm>; 1931defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", 1932 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1933defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas", 1934 atomic_cmp_swap_64_g, i64imm>; 1935defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas", 1936 atomic_cmp_swap_64_s, i64imm>; 1937defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas", 1938 atomic_cmp_swap_64_gen, i64imm>; 1939defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", 1940 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1941 1942// Support for scoped atomic operations. Matches 1943// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1944// and converts it into the appropriate instruction. 1945// NOTE: not all possible combinations are implemented 1946// 'space' is limited to generic as it's the only one needed to support CUDA. 1947// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1948class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds, 1949 dag ins, dag Operands> 1950 : NVPTXInst<(outs regclass:$result), ins, 1951 AsmStr, 1952 [(set (regT regclass:$result), Operands)]>, 1953 Requires<Preds>; 1954 1955// Define instruction variants for all addressing modes. 1956multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1957 ValueType regT, NVPTXRegClass regclass, Operand ImmType, 1958 SDNode Imm, ValueType ImmTy, 1959 list<Predicate> Preds> { 1960 let AddedComplexity = 1 in { 1961 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1962 (ins Int32Regs:$src, regclass:$b), 1963 (Intr (i32 Int32Regs:$src), (regT regclass:$b))>; 1964 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1965 (ins Int64Regs:$src, regclass:$b), 1966 (Intr (i64 Int64Regs:$src), (regT regclass:$b))>; 1967 } 1968 // tablegen can't infer argument types from Intrinsic (though it can 1969 // from Instruction) so we have to enforce specific type on 1970 // immediates via explicit cast to ImmTy. 1971 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1972 (ins Int32Regs:$src, ImmType:$b), 1973 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b))>; 1974 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1975 (ins Int64Regs:$src, ImmType:$b), 1976 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b))>; 1977} 1978 1979multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1980 ValueType regT, NVPTXRegClass regclass, 1981 Operand ImmType, SDNode Imm, ValueType ImmTy, 1982 list<Predicate> Preds> { 1983 // Variants for register/immediate permutations of $b and $c 1984 let AddedComplexity = 2 in { 1985 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1986 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1987 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (regT regclass:$c))>; 1988 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1989 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1990 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (regT regclass:$c))>; 1991 } 1992 let AddedComplexity = 1 in { 1993 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1994 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1995 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>; 1996 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 1997 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1998 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (regT regclass:$c))>; 1999 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2000 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 2001 (Intr (i32 Int32Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>; 2002 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2003 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 2004 (Intr (i64 Int64Regs:$src), (regT regclass:$b), (ImmTy Imm:$c))>; 2005 } 2006 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2007 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 2008 (Intr (i32 Int32Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>; 2009 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2010 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 2011 (Intr (i64 Int64Regs:$src), (ImmTy Imm:$b), (ImmTy Imm:$c))>; 2012} 2013 2014// Constructs intrinsic name and instruction asm strings. 2015multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 2016 string ScopeStr, string SpaceStr, 2017 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2018 ValueType ImmTy, list<Predicate> Preds> { 2019 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 2020 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2021 # "." # OpStr # "." # TypeStr 2022 # " \t$result, [$src], $b;", 2023 !cast<Intrinsic>( 2024 "int_nvvm_atomic_" # OpStr 2025 # "_" # SpaceStr # "_" # IntTypeStr 2026 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2027 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2028} 2029multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 2030 string ScopeStr, string SpaceStr, 2031 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2032 ValueType ImmTy, list<Predicate> Preds> { 2033 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 2034 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2035 # "." # OpStr # "." # TypeStr 2036 # " \t$result, [$src], $b, $c;", 2037 !cast<Intrinsic>( 2038 "int_nvvm_atomic_" # OpStr 2039 # "_" # SpaceStr # "_" # IntTypeStr 2040 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2041 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2042} 2043 2044// Constructs variants for different address spaces. 2045// For now we only need variants for generic space pointers. 2046multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 2047 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType, 2048 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2049 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2050 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2051} 2052multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 2053 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType, 2054 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2055 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2056 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2057} 2058 2059// Constructs variants for different scopes of atomic op. 2060multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 2061 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2062 ValueType ImmTy, list<Predicate> Preds> { 2063 // .gpu scope is default and is currently covered by existing 2064 // atomics w/o explicitly specified scope. 2065 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2066 regT, regclass, ImmType, Imm, ImmTy, 2067 !listconcat(Preds,[hasAtomScope])>; 2068 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2069 regT, regclass, ImmType, Imm, ImmTy, 2070 !listconcat(Preds,[hasAtomScope])>; 2071} 2072multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 2073 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 2074 list<Predicate> Preds> { 2075 // No need to define ".gpu"-scoped atomics. They do the same thing 2076 // as the regular, non-scoped atomics defined elsewhere. 2077 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2078 regT, regclass, ImmType, Imm, ImmTy, 2079 !listconcat(Preds,[hasAtomScope])>; 2080 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2081 regT, regclass, ImmType, Imm, ImmTy, 2082 !listconcat(Preds,[hasAtomScope])>; 2083} 2084 2085// atom.add 2086multiclass ATOM2_add_impl<string OpStr> { 2087 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>; 2088 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2089 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>; 2090 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32, 2091 []>; 2092 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64, 2093 [hasAtomAddF64]>; 2094} 2095 2096// atom.{and,or,xor} 2097multiclass ATOM2_bitwise_impl<string OpStr> { 2098 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2099 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, 2100 [hasAtomBitwise64]>; 2101} 2102 2103// atom.exch 2104multiclass ATOM2_exch_impl<string OpStr> { 2105 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2106 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>; 2107} 2108 2109// atom.{min,max} 2110multiclass ATOM2_minmax_impl<string OpStr> { 2111 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>; 2112 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2113 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64, 2114 [hasAtomMinMax64]>; 2115 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, 2116 [hasAtomMinMax64]>; 2117} 2118 2119// atom.{inc,dec} 2120multiclass ATOM2_incdec_impl<string OpStr> { 2121 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2122} 2123 2124// atom.cas 2125multiclass ATOM3_cas_impl<string OpStr> { 2126 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2127 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>; 2128} 2129 2130defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 2131defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 2132defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 2133defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 2134defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 2135defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 2136defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 2137defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 2138defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 2139defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 2140 2141//----------------------------------- 2142// Support for ldu on sm_20 or later 2143//----------------------------------- 2144 2145// Don't annotate ldu instructions as mayLoad, as they load from memory that is 2146// read-only in a kernel. 2147 2148// Scalar 2149 2150multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 2151 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2152 !strconcat("ldu.global.", TyStr), 2153 []>, Requires<[hasLDU]>; 2154 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2155 !strconcat("ldu.global.", TyStr), 2156 []>, Requires<[hasLDU]>; 2157 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2158 !strconcat("ldu.global.", TyStr), 2159 []>, Requires<[hasLDU]>; 2160 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2161 !strconcat("ldu.global.", TyStr), 2162 []>, Requires<[hasLDU]>; 2163 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2164 !strconcat("ldu.global.", TyStr), 2165 []>, Requires<[hasLDU]>; 2166} 2167 2168defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 2169defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 2170defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 2171defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 2172defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 2173defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 2174 2175// vector 2176 2177// Elementized vector ldu 2178multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2179 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2180 (ins Int32Regs:$src), 2181 !strconcat("ldu.global.", TyStr), []>; 2182 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2183 (ins Int64Regs:$src), 2184 !strconcat("ldu.global.", TyStr), []>; 2185 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2186 (ins MEMri:$src), 2187 !strconcat("ldu.global.", TyStr), []>; 2188 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2189 (ins MEMri64:$src), 2190 !strconcat("ldu.global.", TyStr), []>; 2191 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2192 (ins imemAny:$src), 2193 !strconcat("ldu.global.", TyStr), []>; 2194} 2195 2196multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2197 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2198 regclass:$dst4), (ins Int32Regs:$src), 2199 !strconcat("ldu.global.", TyStr), []>; 2200 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2201 regclass:$dst4), (ins Int64Regs:$src), 2202 !strconcat("ldu.global.", TyStr), []>; 2203 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2204 regclass:$dst4), (ins MEMri:$src), 2205 !strconcat("ldu.global.", TyStr), []>; 2206 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2207 regclass:$dst4), (ins MEMri64:$src), 2208 !strconcat("ldu.global.", TyStr), []>; 2209 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2210 regclass:$dst4), (ins imemAny:$src), 2211 !strconcat("ldu.global.", TyStr), []>; 2212} 2213 2214defm INT_PTX_LDU_G_v2i8_ELE 2215 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2216defm INT_PTX_LDU_G_v2i16_ELE 2217 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2218defm INT_PTX_LDU_G_v2i32_ELE 2219 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2220defm INT_PTX_LDU_G_v2f32_ELE 2221 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2222defm INT_PTX_LDU_G_v2i64_ELE 2223 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2224defm INT_PTX_LDU_G_v2f64_ELE 2225 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2226defm INT_PTX_LDU_G_v4i8_ELE 2227 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2228defm INT_PTX_LDU_G_v4i16_ELE 2229 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2230 Int16Regs>; 2231defm INT_PTX_LDU_G_v4i32_ELE 2232 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2233 Int32Regs>; 2234defm INT_PTX_LDU_G_v4f16_ELE 2235 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2236 Int16Regs>; 2237defm INT_PTX_LDU_G_v4f16x2_ELE 2238 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2239 Int32Regs>; 2240defm INT_PTX_LDU_G_v4f32_ELE 2241 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2242 Float32Regs>; 2243 2244 2245//----------------------------------- 2246// Support for ldg on sm_35 or later 2247//----------------------------------- 2248 2249// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2250// non-coherent texture cache, and therefore the values read must be read-only 2251// during the lifetime of the kernel. 2252 2253multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2254 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2255 !strconcat("ld.global.nc.", TyStr), 2256 []>, Requires<[hasLDG]>; 2257 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2258 !strconcat("ld.global.nc.", TyStr), 2259 []>, Requires<[hasLDG]>; 2260 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2261 !strconcat("ld.global.nc.", TyStr), 2262 []>, Requires<[hasLDG]>; 2263 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2264 !strconcat("ld.global.nc.", TyStr), 2265 []>, Requires<[hasLDG]>; 2266 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2267 !strconcat("ld.global.nc.", TyStr), 2268 []>, Requires<[hasLDG]>; 2269} 2270 2271defm INT_PTX_LDG_GLOBAL_i8 2272 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2273defm INT_PTX_LDG_GLOBAL_i16 2274 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2275defm INT_PTX_LDG_GLOBAL_i32 2276 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2277defm INT_PTX_LDG_GLOBAL_i64 2278 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2279defm INT_PTX_LDG_GLOBAL_f32 2280 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2281defm INT_PTX_LDG_GLOBAL_f64 2282 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2283 2284// vector 2285 2286// Elementized vector ldg 2287multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2288 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2289 (ins Int32Regs:$src), 2290 !strconcat("ld.global.nc.", TyStr), []>; 2291 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2292 (ins Int64Regs:$src), 2293 !strconcat("ld.global.nc.", TyStr), []>; 2294 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2295 (ins MEMri:$src), 2296 !strconcat("ld.global.nc.", TyStr), []>; 2297 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2298 (ins MEMri64:$src), 2299 !strconcat("ld.global.nc.", TyStr), []>; 2300 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2301 (ins imemAny:$src), 2302 !strconcat("ld.global.nc.", TyStr), []>; 2303} 2304 2305multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2306 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2307 regclass:$dst4), (ins Int32Regs:$src), 2308 !strconcat("ld.global.nc.", TyStr), []>; 2309 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2310 regclass:$dst4), (ins Int64Regs:$src), 2311 !strconcat("ld.global.nc.", TyStr), []>; 2312 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2313 regclass:$dst4), (ins MEMri:$src), 2314 !strconcat("ld.global.nc.", TyStr), []>; 2315 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2316 regclass:$dst4), (ins MEMri64:$src), 2317 !strconcat("ld.global.nc.", TyStr), []>; 2318 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2319 regclass:$dst4), (ins imemAny:$src), 2320 !strconcat("ld.global.nc.", TyStr), []>; 2321} 2322 2323// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2324defm INT_PTX_LDG_G_v2i8_ELE 2325 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2326defm INT_PTX_LDG_G_v2i16_ELE 2327 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2328defm INT_PTX_LDG_G_v2i32_ELE 2329 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2330defm INT_PTX_LDG_G_v2f32_ELE 2331 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2332defm INT_PTX_LDG_G_v2i64_ELE 2333 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2334defm INT_PTX_LDG_G_v2f64_ELE 2335 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2336defm INT_PTX_LDG_G_v4i8_ELE 2337 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2338defm INT_PTX_LDG_G_v4i16_ELE 2339 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2340defm INT_PTX_LDG_G_v4i32_ELE 2341 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2342defm INT_PTX_LDG_G_v4f32_ELE 2343 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2344 2345 2346multiclass NG_TO_G<string Str, Intrinsic Intrin> { 2347 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2348 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 2349 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2350 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2351 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 2352 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2353 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 2354 "{{ .reg .b64 %tmp;\n\t" 2355 #" cvt.u64.u32 \t%tmp, $src;\n\t" 2356 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 2357 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 2358 Requires<[useShortPtr]>; 2359} 2360 2361multiclass G_TO_NG<string Str, Intrinsic Intrin> { 2362 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2363 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 2364 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2365 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2366 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 2367 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2368 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 2369 "{{ .reg .b64 %tmp;\n\t" 2370 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 2371 #" cvt.u32.u64 \t$result, %tmp; }}", 2372 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 2373 Requires<[useShortPtr]>; 2374} 2375 2376defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 2377defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 2378defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 2379defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 2380 2381defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 2382defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 2383defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 2384defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 2385 2386 2387// nvvm.ptr.gen.to.param 2388def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 2389 (ins Int32Regs:$src), 2390 "mov.u32 \t$result, $src;", 2391 [(set Int32Regs:$result, 2392 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 2393def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 2394 (ins Int64Regs:$src), 2395 "mov.u64 \t$result, $src;", 2396 [(set Int64Regs:$result, 2397 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2398 2399 2400// nvvm.move intrinsicc 2401def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2402 "mov.b16 \t$r, $s;", 2403 [(set Int16Regs:$r, 2404 (int_nvvm_move_i16 Int16Regs:$s))]>; 2405def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2406 "mov.b32 \t$r, $s;", 2407 [(set Int32Regs:$r, 2408 (int_nvvm_move_i32 Int32Regs:$s))]>; 2409def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2410 "mov.b64 \t$r, $s;", 2411 [(set Int64Regs:$r, 2412 (int_nvvm_move_i64 Int64Regs:$s))]>; 2413def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2414 "mov.f32 \t$r, $s;", 2415 [(set Float32Regs:$r, 2416 (int_nvvm_move_float Float32Regs:$s))]>; 2417def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2418 "mov.f64 \t$r, $s;", 2419 [(set Float64Regs:$r, 2420 (int_nvvm_move_double Float64Regs:$s))]>; 2421def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2422 "mov.u32 \t$r, $s;", 2423 [(set Int32Regs:$r, 2424 (int_nvvm_move_ptr Int32Regs:$s))]>; 2425def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2426 "mov.u64 \t$r, $s;", 2427 [(set Int64Regs:$r, 2428 (int_nvvm_move_ptr Int64Regs:$s))]>; 2429 2430// @TODO: Are these actually needed, or will we always just see symbols 2431// copied to registers first? 2432/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2433 "mov.u32 \t$r, $s;", 2434 [(set Int32Regs:$r, 2435 (int_nvvm_move_ptr texternalsym:$s))]>; 2436def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2437 "mov.u64 \t$r, $s;", 2438 [(set Int64Regs:$r, 2439 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2440 2441 2442// MoveParam %r1, param 2443// ptr_local_to_gen %r2, %r1 2444// ptr_gen_to_local %r3, %r2 2445// -> 2446// mov %r1, param 2447 2448// @TODO: Revisit this. There is a type 2449// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2450// instructions are not currently defined. However, we can use the ptr 2451// variants and the asm printer will do the right thing. 2452def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2453 (MoveParam texternalsym:$src)))), 2454 (nvvm_move_ptr64 texternalsym:$src)>; 2455def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2456 (MoveParam texternalsym:$src)))), 2457 (nvvm_move_ptr32 texternalsym:$src)>; 2458 2459def texsurf_handles 2460 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2461 "mov.u64 \t$result, $src;", []>; 2462 2463//----------------------------------- 2464// Compiler Error Warn 2465// - Just ignore them in codegen 2466//----------------------------------- 2467 2468def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2469 "// llvm.nvvm.compiler.warn()", 2470 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2471def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2472 "// llvm.nvvm.compiler.warn()", 2473 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2474def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2475 "// llvm.nvvm.compiler.error()", 2476 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2477def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2478 "// llvm.nvvm.compiler.error()", 2479 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2480 2481 2482// isspacep 2483 2484multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> { 2485 def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2486 "isspacep." # suffix # "\t$d, $a;", 2487 [(set Int1Regs:$d, (Intr Int32Regs:$a))]>, 2488 Requires<Preds>; 2489 def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2490 "isspacep." # suffix # "\t$d, $a;", 2491 [(set Int1Regs:$d, (Intr Int64Regs:$a))]>, 2492 Requires<Preds>; 2493} 2494 2495defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>; 2496defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>; 2497defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>; 2498defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>; 2499defm isspace_shared_cluster : ISSPACEP<"shared::cluster", 2500 int_nvvm_isspacep_shared_cluster, 2501 [hasPTX<78>, hasSM<90>]>; 2502 2503// Special register reads 2504def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2505 (ins SpecialRegs:$r), 2506 "mov.b32 \t$d, $r;", []>; 2507 2508def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2509def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2510def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2511def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2512def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2513def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2514def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2515def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2516def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2517def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2518def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2519def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2520def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2521def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2522def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2523def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2524def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2525def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2526def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2527def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2528def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2529def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2530def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2531def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2532def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2533def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2534def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2535def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2536def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2537def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2538def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2539def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2540 2541 2542// rotate builtin support 2543 2544def ROTATE_B32_HW_IMM 2545 : NVPTXInst<(outs Int32Regs:$dst), 2546 (ins Int32Regs:$src, i32imm:$amt), 2547 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2548 [(set Int32Regs:$dst, 2549 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2550 Requires<[hasHWROT32]> ; 2551 2552def ROTATE_B32_HW_REG 2553 : NVPTXInst<(outs Int32Regs:$dst), 2554 (ins Int32Regs:$src, Int32Regs:$amt), 2555 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2556 [(set Int32Regs:$dst, 2557 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2558 Requires<[hasHWROT32]> ; 2559 2560def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2561 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2562 Requires<[noHWROT32]> ; 2563 2564def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2565 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2566 Requires<[noHWROT32]> ; 2567 2568let hasSideEffects = false in { 2569 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2570 !strconcat("{{\n\t", 2571 ".reg .b32 %dummy;\n\t", 2572 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2573 "}}"), 2574 []> ; 2575 2576 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2577 !strconcat("{{\n\t", 2578 ".reg .b32 %dummy;\n\t", 2579 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2580 "}}"), 2581 []> ; 2582} 2583 2584let hasSideEffects = false in { 2585 def PACK_TWO_INT32 2586 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2587 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2588} 2589 2590def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2591 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2592 (GET_LO_INT64 Int64Regs:$src))> ; 2593 2594// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2595// no side effects. 2596let hasSideEffects = false in { 2597 def SHF_L_WRAP_B32_IMM 2598 : NVPTXInst<(outs Int32Regs:$dst), 2599 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2600 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2601 Requires<[hasHWROT32]>; 2602 2603 def SHF_L_WRAP_B32_REG 2604 : NVPTXInst<(outs Int32Regs:$dst), 2605 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2606 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2607 Requires<[hasHWROT32]>; 2608 2609 def SHF_R_WRAP_B32_IMM 2610 : NVPTXInst<(outs Int32Regs:$dst), 2611 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2612 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2613 Requires<[hasHWROT32]>; 2614 2615 def SHF_R_WRAP_B32_REG 2616 : NVPTXInst<(outs Int32Regs:$dst), 2617 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2618 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2619 Requires<[hasHWROT32]>; 2620} 2621 2622// HW version of rotate 64 2623def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2624 (PACK_TWO_INT32 2625 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2626 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2627 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2628 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2629 Requires<[hasHWROT32]>; 2630 2631def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2632 (PACK_TWO_INT32 2633 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2634 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2635 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2636 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2637 Requires<[hasHWROT32]>; 2638 2639 2640def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2641 (PACK_TWO_INT32 2642 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2643 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2644 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2645 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2646 Requires<[hasHWROT32]>; 2647 2648def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2649 (PACK_TWO_INT32 2650 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2651 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2652 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2653 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2654 Requires<[hasHWROT32]>; 2655 2656// SW version of rotate 64 2657def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2658 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, 2659 Requires<[noHWROT32]>; 2660def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2661 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2662 Requires<[noHWROT32]>; 2663def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2664 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2665 Requires<[noHWROT32]>; 2666def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2667 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2668 Requires<[noHWROT32]>; 2669 2670 2671//----------------------------------- 2672// Texture Intrinsics 2673//----------------------------------- 2674 2675// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2676// also defined in NVPTXReplaceImageHandles.cpp 2677 2678// texmode_independent 2679let IsTex = true, IsTexModeUnified = false in { 2680// Texture fetch instructions using handles 2681 2682class TEX_1D_base<string inst, NVPTXRegClass outtype, 2683 NVPTXRegClass intype, dag texsamp> 2684 : NVPTXInst<(outs outtype:$r, outtype:$g, 2685 outtype:$b, outtype:$a), 2686 !con(texsamp, (ins intype:$x)), 2687 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2688 []>; 2689 2690multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2691 def _RR : TEX_1D_base<inst, outtype, intype, 2692 (ins Int64Regs:$t, Int64Regs:$s)>; 2693 def _RI : TEX_1D_base<inst, outtype, intype, 2694 (ins Int64Regs:$t, i64imm:$s)>; 2695 def _IR : TEX_1D_base<inst, outtype, intype, 2696 (ins i64imm:$t, Int64Regs:$s)>; 2697 def _II : TEX_1D_base<inst, outtype, intype, 2698 (ins i64imm:$t, i64imm:$s)>; 2699} 2700 2701defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 2702defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2703defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 2704defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2705defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 2706defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2707 2708class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 2709 NVPTXRegClass intype, dag texsamp> 2710 : NVPTXInst<(outs outtype:$r, outtype:$g, 2711 outtype:$b, outtype:$a), 2712 !con(texsamp, (ins intype:$x, intype:$lod)), 2713 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", 2714 []>; 2715 2716multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, 2717 NVPTXRegClass intype> { 2718 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype, 2719 (ins Int64Regs:$t, Int64Regs:$s)>; 2720 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, 2721 (ins Int64Regs:$t, i64imm:$s)>; 2722 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, 2723 (ins i64imm:$t, Int64Regs:$s)>; 2724 def _II : TEX_1D_LEVEL_base<inst, outtype, intype, 2725 (ins i64imm:$t, i64imm:$s)>; 2726} 2727 2728defm TEX_1D_F32_F32_LEVEL : 2729 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2730defm TEX_1D_S32_F32_LEVEL : 2731 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2732defm TEX_1D_U32_F32_LEVEL : 2733 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2734 2735class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, 2736 NVPTXRegClass intype, dag texsamp> 2737 : NVPTXInst<(outs outtype:$r, outtype:$g, 2738 outtype:$b, outtype:$a), 2739 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), 2740 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," 2741 " \\{$gradx\\}, \\{$grady\\};", 2742 []>; 2743 2744multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, 2745 NVPTXRegClass intype> { 2746 def _RR : TEX_1D_GRAD_base<inst, outtype, intype, 2747 (ins Int64Regs:$t, Int64Regs:$s)>; 2748 def _RI : TEX_1D_GRAD_base<inst, outtype, intype, 2749 (ins Int64Regs:$t, i64imm:$s)>; 2750 def _IR : TEX_1D_GRAD_base<inst, outtype, intype, 2751 (ins i64imm:$t, Int64Regs:$s)>; 2752 def _II : TEX_1D_GRAD_base<inst, outtype, intype, 2753 (ins i64imm:$t, i64imm:$s)>; 2754} 2755 2756defm TEX_1D_F32_F32_GRAD 2757 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2758defm TEX_1D_S32_F32_GRAD 2759 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2760defm TEX_1D_U32_F32_GRAD 2761 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2762 2763class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 2764 NVPTXRegClass intype, dag texsamp> 2765 : NVPTXInst<(outs outtype:$r, outtype:$g, 2766 outtype:$b, outtype:$a), 2767 !con(texsamp, (ins Int32Regs:$l, intype:$x)), 2768 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", 2769 []>; 2770 2771multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, 2772 NVPTXRegClass intype> { 2773 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype, 2774 (ins Int64Regs:$t, Int64Regs:$s)>; 2775 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, 2776 (ins Int64Regs:$t, i64imm:$s)>; 2777 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, 2778 (ins i64imm:$t, Int64Regs:$s)>; 2779 def _II : TEX_1D_ARRAY_base<inst, outtype, intype, 2780 (ins i64imm:$t, i64imm:$s)>; 2781} 2782 2783defm TEX_1D_ARRAY_F32_F32 2784 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2785defm TEX_1D_ARRAY_F32_S32 2786 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 2787defm TEX_1D_ARRAY_S32_S32 2788 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 2789defm TEX_1D_ARRAY_S32_F32 2790 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2791defm TEX_1D_ARRAY_U32_S32 2792 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 2793defm TEX_1D_ARRAY_U32_F32 2794 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2795 2796class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2797 NVPTXRegClass intype, dag texsamp> 2798 : NVPTXInst<(outs outtype:$r, outtype:$g, 2799 outtype:$b, outtype:$a), 2800 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), 2801 inst # " \t\\{$r, $g, $b, $a\\}," 2802 " [$t, $s, \\{$l, $x\\}], $lod;", 2803 []>; 2804 2805multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2806 NVPTXRegClass intype> { 2807 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2808 (ins Int64Regs:$t, Int64Regs:$s)>; 2809 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2810 (ins Int64Regs:$t, i64imm:$s)>; 2811 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2812 (ins i64imm:$t, Int64Regs:$s)>; 2813 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2814 (ins i64imm:$t, i64imm:$s)>; 2815} 2816 2817defm TEX_1D_ARRAY_F32_F32_LEVEL 2818 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2819defm TEX_1D_ARRAY_S32_F32_LEVEL 2820 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2821defm TEX_1D_ARRAY_U32_F32_LEVEL 2822 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2823 2824class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2825 NVPTXRegClass intype, dag texsamp> 2826 : NVPTXInst<(outs outtype:$r, outtype:$g, 2827 outtype:$b, outtype:$a), 2828 !con(texsamp, (ins Int32Regs:$l, intype:$x, 2829 intype:$gradx, intype:$grady)), 2830 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," 2831 " \\{$gradx\\}, \\{$grady\\};", 2832 []>; 2833 2834multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 2835 NVPTXRegClass intype> { 2836 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2837 (ins Int64Regs:$t, Int64Regs:$s)>; 2838 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2839 (ins Int64Regs:$t, i64imm:$s)>; 2840 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2841 (ins i64imm:$t, Int64Regs:$s)>; 2842 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2843 (ins i64imm:$t, i64imm:$s)>; 2844} 2845 2846defm TEX_1D_ARRAY_F32_F32_GRAD 2847 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2848defm TEX_1D_ARRAY_S32_F32_GRAD 2849 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2850defm TEX_1D_ARRAY_U32_F32_GRAD 2851 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2852 2853class TEX_2D_base<string inst, NVPTXRegClass outtype, 2854 NVPTXRegClass intype, dag texsamp> 2855 : NVPTXInst<(outs outtype:$r, outtype:$g, 2856 outtype:$b, outtype:$a), 2857 !con(texsamp, (ins intype:$x, intype:$y)), 2858 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", 2859 []>; 2860 2861multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2862 def _RR : TEX_2D_base<inst, outtype, intype, 2863 (ins Int64Regs:$t, Int64Regs:$s)>; 2864 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; 2865 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; 2866 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; 2867} 2868 2869defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2870defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 2871defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 2872defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2873defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 2874defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2875 2876class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 2877 NVPTXRegClass intype, dag texsamp> 2878 : NVPTXInst<(outs outtype:$r, outtype:$g, 2879 outtype:$b, outtype:$a), 2880 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), 2881 inst # " \t\\{$r, $g, $b, $a\\}," 2882 " [$t, $s, \\{$x, $y\\}], $lod;", 2883 []>; 2884 2885multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, 2886 NVPTXRegClass intype> { 2887 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype, 2888 (ins Int64Regs:$t, Int64Regs:$s)>; 2889 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, 2890 (ins Int64Regs:$t, i64imm:$s)>; 2891 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, 2892 (ins i64imm:$t, Int64Regs:$s)>; 2893 def _II : TEX_2D_LEVEL_base<inst, outtype, intype, 2894 (ins i64imm:$t, i64imm:$s)>; 2895} 2896 2897defm TEX_2D_F32_F32_LEVEL : 2898 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2899defm TEX_2D_S32_F32_LEVEL : 2900 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2901defm TEX_2D_U32_F32_LEVEL : 2902 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2903 2904class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, 2905 NVPTXRegClass intype, dag texsamp> 2906 : NVPTXInst<(outs outtype:$r, outtype:$g, 2907 outtype:$b, outtype:$a), 2908 !con(texsamp, (ins intype:$x, intype:$y, 2909 intype:$gradx0, intype:$gradx1, 2910 intype:$grady0, intype:$grady1)), 2911 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," 2912 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 2913 []>; 2914 2915multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, 2916 NVPTXRegClass intype> { 2917 def _RR : TEX_2D_GRAD_base<inst, outtype, intype, 2918 (ins Int64Regs:$t, Int64Regs:$s)>; 2919 def _RI : TEX_2D_GRAD_base<inst, outtype, intype, 2920 (ins Int64Regs:$t, i64imm:$s)>; 2921 def _IR : TEX_2D_GRAD_base<inst, outtype, intype, 2922 (ins i64imm:$t, Int64Regs:$s)>; 2923 def _II : TEX_2D_GRAD_base<inst, outtype, intype, 2924 (ins i64imm:$t, i64imm:$s)>; 2925} 2926 2927defm TEX_2D_F32_F32_GRAD : 2928 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2929defm TEX_2D_S32_F32_GRAD : 2930 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2931defm TEX_2D_U32_F32_GRAD : 2932 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2933 2934class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 2935 NVPTXRegClass intype, dag texsamp> 2936 : NVPTXInst<(outs outtype:$r, outtype:$g, 2937 outtype:$b, outtype:$a), 2938 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), 2939 inst # " \t\\{$r, $g, $b, $a\\}," 2940 " [$t, $s, \\{$l, $x, $y, $y\\}];", 2941 []>; 2942 2943multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, 2944 NVPTXRegClass intype> { 2945 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype, 2946 (ins Int64Regs:$t, Int64Regs:$s)>; 2947 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, 2948 (ins Int64Regs:$t, i64imm:$s)>; 2949 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, 2950 (ins i64imm:$t, Int64Regs:$s)>; 2951 def _II : TEX_2D_ARRAY_base<inst, outtype, intype, 2952 (ins i64imm:$t, i64imm:$s)>; 2953} 2954 2955defm TEX_2D_ARRAY_F32_F32 2956 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2957defm TEX_2D_ARRAY_F32_S32 2958 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 2959defm TEX_2D_ARRAY_S32_S32 2960 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 2961defm TEX_2D_ARRAY_S32_F32 2962 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2963defm TEX_2D_ARRAY_U32_S32 2964 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 2965defm TEX_2D_ARRAY_U32_F32 2966 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2967 2968class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2969 NVPTXRegClass intype, dag texsamp> 2970 : NVPTXInst<(outs outtype:$r, outtype:$g, 2971 outtype:$b, outtype:$a), 2972 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2973 intype:$lod)), 2974 inst # " \t\\{$r, $g, $b, $a\\}," 2975 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2976 []>; 2977 2978multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2979 NVPTXRegClass intype> { 2980 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2981 (ins Int64Regs:$t, Int64Regs:$s)>; 2982 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2983 (ins Int64Regs:$t, i64imm:$s)>; 2984 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2985 (ins i64imm:$t, Int64Regs:$s)>; 2986 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2987 (ins i64imm:$t, i64imm:$s)>; 2988} 2989 2990defm TEX_2D_ARRAY_F32_F32_LEVEL 2991 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2992defm TEX_2D_ARRAY_S32_F32_LEVEL 2993 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2994defm TEX_2D_ARRAY_U32_F32_LEVEL 2995 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2996 2997class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2998 NVPTXRegClass intype, dag texsamp> 2999 : NVPTXInst<(outs outtype:$r, outtype:$g, 3000 outtype:$b, outtype:$a), 3001 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3002 intype:$gradx0, intype:$gradx1, 3003 intype:$grady0, intype:$grady1)), 3004 inst # " \t\\{$r, $g, $b, $a\\}," 3005 " [$t, $s, \\{$l, $x, $y, $y\\}]," 3006 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3007 []>; 3008 3009multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3010 NVPTXRegClass intype> { 3011 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3012 (ins Int64Regs:$t, Int64Regs:$s)>; 3013 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3014 (ins Int64Regs:$t, i64imm:$s)>; 3015 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3016 (ins i64imm:$t, Int64Regs:$s)>; 3017 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3018 (ins i64imm:$t, i64imm:$s)>; 3019} 3020 3021defm TEX_2D_ARRAY_F32_F32_GRAD 3022 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3023defm TEX_2D_ARRAY_S32_F32_GRAD 3024 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3025defm TEX_2D_ARRAY_U32_F32_GRAD 3026 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3027 3028class TEX_3D_base<string inst, NVPTXRegClass outtype, 3029 NVPTXRegClass intype, dag texsamp> 3030 : NVPTXInst<(outs outtype:$r, outtype:$g, 3031 outtype:$b, outtype:$a), 3032 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3033 inst # " \t\\{$r, $g, $b, $a\\}," 3034 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3035 []>; 3036 3037multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3038 def _RR : TEX_3D_base<inst, outtype, intype, 3039 (ins Int64Regs:$t, Int64Regs:$s)>; 3040 def _RI : TEX_3D_base<inst, outtype, intype, 3041 (ins Int64Regs:$t, i64imm:$s)>; 3042 def _IR : TEX_3D_base<inst, outtype, intype, 3043 (ins i64imm:$t, Int64Regs:$s)>; 3044 def _II : TEX_3D_base<inst, outtype, intype, 3045 (ins i64imm:$t, i64imm:$s)>; 3046} 3047 3048defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3049defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3050defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3051defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3052defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3053defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3054 3055class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3056 NVPTXRegClass intype, dag texsamp> 3057 : NVPTXInst<(outs outtype:$r, outtype:$g, 3058 outtype:$b, outtype:$a), 3059 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3060 intype:$lod)), 3061 inst # " \t\\{$r, $g, $b, $a\\}," 3062 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3063 []>; 3064 3065multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, 3066 NVPTXRegClass intype> { 3067 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype, 3068 (ins Int64Regs:$t, Int64Regs:$s)>; 3069 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, 3070 (ins Int64Regs:$t, i64imm:$s)>; 3071 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, 3072 (ins i64imm:$t, Int64Regs:$s)>; 3073 def _II : TEX_3D_LEVEL_base<inst, outtype, intype, 3074 (ins i64imm:$t, i64imm:$s)>; 3075} 3076 3077defm TEX_3D_F32_F32_LEVEL 3078 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3079defm TEX_3D_S32_F32_LEVEL 3080 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3081defm TEX_3D_U32_F32_LEVEL 3082 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3083 3084class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3085 NVPTXRegClass intype, dag texsamp> 3086 : NVPTXInst<(outs outtype:$r, outtype:$g, 3087 outtype:$b, outtype:$a), 3088 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3089 intype :$gradx0, intype:$gradx1, 3090 intype:$gradx2, intype:$grady0, 3091 intype:$grady1, intype:$grady2)), 3092 inst # " \t\\{$r, $g, $b, $a\\}," 3093 " [$t, $s, \\{$x, $y, $z, $z\\}]," 3094 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3095 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3096 []>; 3097 3098multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, 3099 NVPTXRegClass intype> { 3100 def _RR : TEX_3D_GRAD_base<inst, outtype, intype, 3101 (ins Int64Regs:$t, Int64Regs:$s)>; 3102 def _RI : TEX_3D_GRAD_base<inst, outtype, intype, 3103 (ins Int64Regs:$t, i64imm:$s)>; 3104 def _IR : TEX_3D_GRAD_base<inst, outtype, intype, 3105 (ins i64imm:$t, Int64Regs:$s)>; 3106 def _II : TEX_3D_GRAD_base<inst, outtype, intype, 3107 (ins i64imm:$t, i64imm:$s)>; 3108} 3109 3110defm TEX_3D_F32_F32_GRAD 3111 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3112defm TEX_3D_S32_F32_GRAD 3113 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3114defm TEX_3D_U32_F32_GRAD 3115 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3116 3117class TEX_CUBE_base<string inst, NVPTXRegClass outtype, 3118 NVPTXRegClass intype, dag texsamp> 3119 : NVPTXInst<(outs outtype:$r, outtype:$g, 3120 outtype:$b, outtype:$a), 3121 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3122 inst # " \t\\{$r, $g, $b, $a\\}," 3123 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3124 []>; 3125 3126multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3127 def _RR : TEX_CUBE_base<inst, outtype, intype, 3128 (ins Int64Regs:$t, Int64Regs:$s)>; 3129 def _RI : TEX_CUBE_base<inst, outtype, intype, 3130 (ins Int64Regs:$t, i64imm:$s)>; 3131 def _IR : TEX_CUBE_base<inst, outtype, intype, 3132 (ins i64imm:$t, Int64Regs:$s)>; 3133 def _II : TEX_CUBE_base<inst, outtype, intype, 3134 (ins i64imm:$t, i64imm:$s)>; 3135} 3136 3137defm TEX_CUBE_F32_F32 3138 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3139defm TEX_CUBE_S32_F32 3140 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3141defm TEX_CUBE_U32_F32 3142 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3143 3144class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3145 NVPTXRegClass intype, dag texsamp> 3146 : NVPTXInst<(outs outtype:$r, outtype:$g, 3147 outtype:$b, outtype:$a), 3148 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3149 intype:$lod)), 3150 inst # " \t\\{$r, $g, $b, $a\\}," 3151 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3152 []>; 3153 3154multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3155 NVPTXRegClass intype> { 3156 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3157 (ins Int64Regs:$t, Int64Regs:$s)>; 3158 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3159 (ins Int64Regs:$t, i64imm:$s)>; 3160 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3161 (ins i64imm:$t, Int64Regs:$s)>; 3162 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3163 (ins i64imm:$t, i64imm:$s)>; 3164} 3165 3166defm TEX_CUBE_F32_F32_LEVEL 3167 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3168defm TEX_CUBE_S32_F32_LEVEL 3169 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3170defm TEX_CUBE_U32_F32_LEVEL 3171 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3172 3173class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3174 NVPTXRegClass intype, dag texsamp> 3175 : NVPTXInst<(outs outtype:$r, outtype:$g, 3176 outtype:$b, outtype:$a), 3177 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3178 intype:$z)), 3179 inst # " \t\\{$r, $g, $b, $a\\}," 3180 " [$t, $s, \\{$l, $x, $y, $z\\}];", 3181 []>; 3182 3183multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3184 NVPTXRegClass intype> { 3185 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3186 (ins Int64Regs:$t, Int64Regs:$s)>; 3187 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3188 (ins Int64Regs:$t, i64imm:$s)>; 3189 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3190 (ins i64imm:$t, Int64Regs:$s)>; 3191 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3192 (ins i64imm:$t, i64imm:$s)>; 3193} 3194 3195defm TEX_CUBE_ARRAY_F32_F32 3196 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3197defm TEX_CUBE_ARRAY_S32_F32 3198 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3199defm TEX_CUBE_ARRAY_U32_F32 3200 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3201 3202class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3203 NVPTXRegClass intype, dag texsamp> 3204 : NVPTXInst<(outs outtype:$r, outtype:$g, 3205 outtype:$b, outtype:$a), 3206 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3207 intype:$z, intype:$lod)), 3208 inst # " \t\\{$r, $g, $b, $a\\}," 3209 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3210 []>; 3211 3212multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3213 NVPTXRegClass intype> { 3214 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3215 (ins Int64Regs:$t, Int64Regs:$s)>; 3216 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3217 (ins Int64Regs:$t, i64imm:$s)>; 3218 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3219 (ins i64imm:$t, Int64Regs:$s)>; 3220 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3221 (ins i64imm:$t, i64imm:$s)>; 3222} 3223 3224defm TEX_CUBE_ARRAY_F32_F32_LEVEL 3225 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3226 Float32Regs, Float32Regs>; 3227defm TEX_CUBE_ARRAY_S32_F32_LEVEL 3228 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3229 Int32Regs, Float32Regs>; 3230defm TEX_CUBE_ARRAY_U32_F32_LEVEL 3231 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3232 Int32Regs, Float32Regs>; 3233 3234class TLD4_2D_base<string inst, NVPTXRegClass outtype, 3235 NVPTXRegClass intype, dag texsamp> 3236 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3237 outtype:$v2, outtype:$v3), 3238 !con(texsamp, (ins intype:$x, intype:$y)), 3239 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", 3240 []>; 3241 3242multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3243 def _RR : TLD4_2D_base<inst, outtype, intype, 3244 (ins Int64Regs:$t, Int64Regs:$s)>; 3245 def _RI : TLD4_2D_base<inst, outtype, intype, 3246 (ins Int64Regs:$t, i64imm:$s)>; 3247 def _IR : TLD4_2D_base<inst, outtype, intype, 3248 (ins i64imm:$t, Int64Regs:$s)>; 3249 def _II : TLD4_2D_base<inst, outtype, intype, 3250 (ins i64imm:$t, i64imm:$s)>; 3251} 3252 3253defm TLD4_R_2D_F32_F32 3254 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3255defm TLD4_G_2D_F32_F32 3256 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3257defm TLD4_B_2D_F32_F32 3258 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3259defm TLD4_A_2D_F32_F32 3260 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3261 3262defm TLD4_R_2D_S32_F32 3263 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3264defm TLD4_G_2D_S32_F32 3265 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3266defm TLD4_B_2D_S32_F32 3267 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3268defm TLD4_A_2D_S32_F32 3269 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3270 3271defm TLD4_R_2D_U32_F32 3272 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3273defm TLD4_G_2D_U32_F32 3274 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3275defm TLD4_B_2D_U32_F32 3276 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3277defm TLD4_A_2D_U32_F32 3278 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3279 3280} 3281 3282 3283// texmode_unified 3284let IsTex = true, IsTexModeUnified = true in { 3285// Texture fetch instructions using handles 3286 3287class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, 3288 NVPTXRegClass intype, dag tex> 3289 : NVPTXInst<(outs outtype:$r, outtype:$g, 3290 outtype:$b, outtype:$a), 3291 !con(tex, (ins intype:$x)), 3292 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3293 []>; 3294 3295multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, 3296 NVPTXRegClass intype> { 3297 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3298 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; 3299} 3300 3301defm TEX_UNIFIED_1D_F32_S32 3302 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 3303defm TEX_UNIFIED_1D_F32_F32 3304 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3305defm TEX_UNIFIED_1D_S32_S32 3306 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 3307defm TEX_UNIFIED_1D_S32_F32 3308 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3309defm TEX_UNIFIED_1D_U32_S32 3310 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 3311defm TEX_UNIFIED_1D_U32_F32 3312 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3313 3314class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 3315 NVPTXRegClass intype, dag tex> 3316 : NVPTXInst<(outs outtype:$r, outtype:$g, 3317 outtype:$b, outtype:$a), 3318 !con(tex, (ins intype:$x, intype:$lod)), 3319 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", 3320 []>; 3321 3322multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, 3323 NVPTXRegClass intype> { 3324 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3325 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3326} 3327 3328defm TEX_UNIFIED_1D_F32_F32_LEVEL 3329 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3330defm TEX_UNIFIED_1D_S32_F32_LEVEL 3331 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3332defm TEX_UNIFIED_1D_U32_F32_LEVEL 3333 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3334 3335class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, 3336 NVPTXRegClass intype, dag tex> 3337 : NVPTXInst<(outs outtype:$r, outtype:$g, 3338 outtype:$b, outtype:$a), 3339 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), 3340 inst # " \t\\{$r, $g, $b, $a\\}," 3341 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3342 []>; 3343 3344multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, 3345 NVPTXRegClass intype> { 3346 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3347 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3348} 3349 3350defm TEX_UNIFIED_1D_F32_F32_GRAD 3351 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3352defm TEX_UNIFIED_1D_S32_F32_GRAD 3353 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3354defm TEX_UNIFIED_1D_U32_F32_GRAD 3355 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3356 3357class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 3358 NVPTXRegClass intype, dag tex> 3359 : NVPTXInst<(outs outtype:$r, outtype:$g, 3360 outtype:$b, outtype:$a), 3361 !con(tex, (ins Int32Regs:$l, intype:$x)), 3362 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", 3363 []>; 3364 3365multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, 3366 NVPTXRegClass intype> { 3367 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3368 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3369} 3370 3371defm TEX_UNIFIED_1D_ARRAY_F32_S32 3372 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 3373defm TEX_UNIFIED_1D_ARRAY_F32_F32 3374 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 3375defm TEX_UNIFIED_1D_ARRAY_S32_S32 3376 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 3377defm TEX_UNIFIED_1D_ARRAY_S32_F32 3378 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 3379defm TEX_UNIFIED_1D_ARRAY_U32_S32 3380 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 3381defm TEX_UNIFIED_1D_ARRAY_U32_F32 3382 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 3383 3384class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3385 NVPTXRegClass intype, dag tex> 3386 : NVPTXInst<(outs outtype:$r, outtype:$g, 3387 outtype:$b, outtype:$a), 3388 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), 3389 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", 3390 []>; 3391 3392multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3393 NVPTXRegClass intype> { 3394 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3395 (ins Int64Regs:$t)>; 3396 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3397 (ins i64imm:$t)>; 3398} 3399 3400defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3401 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", 3402 Float32Regs, Float32Regs>; 3403defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3404 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", 3405 Int32Regs, Float32Regs>; 3406defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3407 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", 3408 Int32Regs, Float32Regs>; 3409 3410class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3411 NVPTXRegClass intype, dag tex> 3412 : NVPTXInst<(outs outtype:$r, outtype:$g, 3413 outtype:$b, outtype:$a), 3414 !con(tex, (ins Int32Regs:$l, intype:$x, 3415 intype:$gradx, intype:$grady)), 3416 inst # " \t\\{$r, $g, $b, $a\\}," 3417 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3418 []>; 3419 3420multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3421 NVPTXRegClass intype> { 3422 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3423 (ins Int64Regs:$t)>; 3424 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3425 (ins i64imm:$t)>; 3426} 3427 3428defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3429 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", 3430 Float32Regs, Float32Regs>; 3431defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3432 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", 3433 Int32Regs, Float32Regs>; 3434defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3435 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", 3436 Int32Regs, Float32Regs>; 3437 3438class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3439 NVPTXRegClass intype, dag tex> 3440 : NVPTXInst<(outs outtype:$r, outtype:$g, 3441 outtype:$b, outtype:$a), 3442 !con(tex, (ins intype:$x, intype:$y)), 3443 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", 3444 []>; 3445 3446multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3447 NVPTXRegClass intype> { 3448 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3449 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3450} 3451 3452defm TEX_UNIFIED_2D_F32_S32 3453 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 3454defm TEX_UNIFIED_2D_F32_F32 3455 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3456defm TEX_UNIFIED_2D_S32_S32 3457 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 3458defm TEX_UNIFIED_2D_S32_F32 3459 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3460defm TEX_UNIFIED_2D_U32_S32 3461 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 3462defm TEX_UNIFIED_2D_U32_F32 3463 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3464 3465class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 3466 NVPTXRegClass intype, dag tex> 3467 : NVPTXInst<(outs outtype:$r, outtype:$g, 3468 outtype:$b, outtype:$a), 3469 !con(tex, (ins intype:$x, intype:$y, intype:$lod)), 3470 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", 3471 []>; 3472 3473multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, 3474 NVPTXRegClass intype> { 3475 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3476 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3477} 3478 3479defm TEX_UNIFIED_2D_F32_F32_LEVEL 3480 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3481defm TEX_UNIFIED_2D_S32_F32_LEVEL 3482 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3483defm TEX_UNIFIED_2D_U32_F32_LEVEL 3484 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3485 3486class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, 3487 NVPTXRegClass intype, dag tex> 3488 : NVPTXInst<(outs outtype:$r, outtype:$g, 3489 outtype:$b, outtype:$a), 3490 !con(tex, (ins intype:$x, intype:$y, 3491 intype:$gradx0, intype:$gradx1, 3492 intype:$grady0, intype:$grady1)), 3493 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," 3494 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3495 []>; 3496multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, 3497 NVPTXRegClass intype> { 3498 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3499 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3500} 3501 3502defm TEX_UNIFIED_2D_F32_F32_GRAD 3503 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3504defm TEX_UNIFIED_2D_S32_F32_GRAD 3505 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3506defm TEX_UNIFIED_2D_U32_F32_GRAD 3507 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3508 3509class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 3510 NVPTXRegClass intype, dag tex> 3511 : NVPTXInst<(outs outtype:$r, outtype:$g, 3512 outtype:$b, outtype:$a), 3513 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), 3514 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", 3515 []>; 3516multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, 3517 NVPTXRegClass intype> { 3518 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3519 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3520} 3521 3522defm TEX_UNIFIED_2D_ARRAY_F32_S32 3523 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 3524defm TEX_UNIFIED_2D_ARRAY_F32_F32 3525 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3526defm TEX_UNIFIED_2D_ARRAY_S32_S32 3527 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 3528defm TEX_UNIFIED_2D_ARRAY_S32_F32 3529 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3530defm TEX_UNIFIED_2D_ARRAY_U32_S32 3531 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 3532defm TEX_UNIFIED_2D_ARRAY_U32_F32 3533 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3534 3535class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3536 NVPTXRegClass intype, dag tex> 3537 : NVPTXInst<(outs outtype:$r, outtype:$g, 3538 outtype:$b, outtype:$a), 3539 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3540 intype:$lod)), 3541 inst # " \t\\{$r, $g, $b, $a\\}," 3542 " [$t, \\{$l, $x, $y, $y\\}], $lod;", 3543 []>; 3544multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3545 NVPTXRegClass intype> { 3546 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3547 (ins Int64Regs:$t)>; 3548 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3549 (ins i64imm:$t)>; 3550} 3551 3552defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3553 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", 3554 Float32Regs, Float32Regs>; 3555defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3556 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", 3557 Int32Regs, Float32Regs>; 3558defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3559 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", 3560 Int32Regs, Float32Regs>; 3561 3562class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3563 NVPTXRegClass intype, dag tex> 3564 : NVPTXInst<(outs outtype:$r, outtype:$g, 3565 outtype:$b, outtype:$a), 3566 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3567 intype:$gradx0, intype:$gradx1, 3568 intype:$grady0, intype:$grady1)), 3569 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," 3570 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3571 []>; 3572multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3573 NVPTXRegClass intype> { 3574 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3575 (ins Int64Regs:$t)>; 3576 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3577 (ins i64imm:$t)>; 3578} 3579 3580defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3581 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", 3582 Float32Regs, Float32Regs>; 3583defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3584 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", 3585 Int32Regs, Float32Regs>; 3586defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3587 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", 3588 Int32Regs, Float32Regs>; 3589 3590class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, 3591 NVPTXRegClass intype, dag tex> 3592 : NVPTXInst<(outs outtype:$r, outtype:$g, 3593 outtype:$b, outtype:$a), 3594 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3595 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3596 []>; 3597multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, 3598 NVPTXRegClass intype> { 3599 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3600 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; 3601} 3602 3603defm TEX_UNIFIED_3D_F32_S32 3604 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3605defm TEX_UNIFIED_3D_F32_F32 3606 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3607defm TEX_UNIFIED_3D_S32_S32 3608 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3609defm TEX_UNIFIED_3D_S32_F32 3610 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3611defm TEX_UNIFIED_3D_U32_S32 3612 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3613defm TEX_UNIFIED_3D_U32_F32 3614 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3615 3616class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3617 NVPTXRegClass intype, dag tex> 3618 : NVPTXInst<(outs outtype:$r, outtype:$g, 3619 outtype:$b, outtype:$a), 3620 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3621 inst # " \t\\{$r, $g, $b, $a\\}," 3622 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3623 []>; 3624multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, 3625 NVPTXRegClass intype> { 3626 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3627 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3628} 3629 3630defm TEX_UNIFIED_3D_F32_F32_LEVEL 3631 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3632defm TEX_UNIFIED_3D_S32_F32_LEVEL 3633 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3634defm TEX_UNIFIED_3D_U32_F32_LEVEL 3635 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3636 3637class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3638 NVPTXRegClass intype, dag tex> 3639 : NVPTXInst<(outs outtype:$r, outtype:$g, 3640 outtype:$b, outtype:$a), 3641 !con(tex, (ins intype:$x, intype:$y, intype:$z, 3642 intype:$gradx0, intype:$gradx1, 3643 intype:$gradx2, intype:$grady0, 3644 intype:$grady1, intype:$grady2)), 3645 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 3646 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3647 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3648 []>; 3649multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, 3650 NVPTXRegClass intype> { 3651 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3652 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3653} 3654 3655defm TEX_UNIFIED_3D_F32_F32_GRAD 3656 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3657defm TEX_UNIFIED_3D_S32_F32_GRAD 3658 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3659defm TEX_UNIFIED_3D_U32_F32_GRAD 3660 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3661 3662class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, 3663 NVPTXRegClass intype, dag tex> 3664 : NVPTXInst<(outs outtype:$r, outtype:$g, 3665 outtype:$b, outtype:$a), 3666 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3667 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3668 []>; 3669multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, 3670 NVPTXRegClass intype> { 3671 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3672 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; 3673} 3674 3675defm TEX_UNIFIED_CUBE_F32_F32 3676 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3677defm TEX_UNIFIED_CUBE_S32_F32 3678 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3679defm TEX_UNIFIED_CUBE_U32_F32 3680 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3681 3682class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3683 NVPTXRegClass intype, dag tex> 3684 : NVPTXInst<(outs outtype:$r, outtype:$g, 3685 outtype:$b, outtype:$a), 3686 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3687 inst # " \t\\{$r, $g, $b, $a\\}," 3688 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3689 []>; 3690multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3691 NVPTXRegClass intype> { 3692 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3693 (ins Int64Regs:$t)>; 3694 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3695 (ins i64imm:$t)>; 3696} 3697 3698defm TEX_UNIFIED_CUBE_F32_F32_LEVEL 3699 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", 3700 Float32Regs, Float32Regs>; 3701defm TEX_UNIFIED_CUBE_S32_F32_LEVEL 3702 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", 3703 Int32Regs, Float32Regs>; 3704defm TEX_UNIFIED_CUBE_U32_F32_LEVEL 3705 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", 3706 Int32Regs, Float32Regs>; 3707 3708class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3709 NVPTXRegClass intype, dag tex> 3710 : NVPTXInst<(outs outtype:$r, outtype:$g, 3711 outtype:$b, outtype:$a), 3712 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), 3713 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", 3714 []>; 3715multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3716 NVPTXRegClass intype> { 3717 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3718 (ins Int64Regs:$t)>; 3719 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3720 (ins i64imm:$t)>; 3721} 3722 3723defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 3724 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3725defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 3726 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3727defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 3728 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3729 3730class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3731 NVPTXRegClass intype, dag tex> 3732 : NVPTXInst<(outs outtype:$r, outtype:$g, 3733 outtype:$b, outtype:$a), 3734 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 3735 intype:$lod)), 3736 inst # " \t\\{$r, $g, $b, $a\\}," 3737 " [$t, \\{$l, $x, $y, $z\\}], $lod;", 3738 []>; 3739multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3740 NVPTXRegClass intype> { 3741 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3742 (ins Int64Regs:$t)>; 3743 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3744 (ins i64imm:$t)>; 3745} 3746 3747defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3748 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3749 Float32Regs, Float32Regs>; 3750defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3751 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3752 Int32Regs, Float32Regs>; 3753defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3754 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3755 Int32Regs, Float32Regs>; 3756 3757class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3758 NVPTXRegClass intype, dag tex> 3759 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3760 outtype:$v2, outtype:$v3), 3761 !con(tex, (ins intype:$x, intype:$y)), 3762 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", 3763 []>; 3764multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3765 NVPTXRegClass intype> { 3766 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3767 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3768} 3769 3770defm TLD4_UNIFIED_R_2D_F32_F32 3771 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3772defm TLD4_UNIFIED_G_2D_F32_F32 3773 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3774defm TLD4_UNIFIED_B_2D_F32_F32 3775 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3776defm TLD4_UNIFIED_A_2D_F32_F32 3777 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3778 3779defm TLD4_UNIFIED_R_2D_S32_F32 3780 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3781defm TLD4_UNIFIED_G_2D_S32_F32 3782 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3783defm TLD4_UNIFIED_B_2D_S32_F32 3784 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3785defm TLD4_UNIFIED_A_2D_S32_F32 3786 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3787 3788defm TLD4_UNIFIED_R_2D_U32_F32 3789 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3790defm TLD4_UNIFIED_G_2D_U32_F32 3791 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3792defm TLD4_UNIFIED_B_2D_U32_F32 3793 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3794defm TLD4_UNIFIED_A_2D_U32_F32 3795 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3796 3797} 3798 3799 3800 3801//=== Surface load instructions 3802 3803let IsSuld = true in { 3804 3805class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf> 3806 : NVPTXInst<(outs outtype:$r), 3807 !con(surf, (ins Int32Regs:$x)), 3808 inst # " \\{$r\\}, [$s, \\{$x\\}];", 3809 []>; 3810multiclass SULD_1D<string inst, NVPTXRegClass outtype> { 3811 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>; 3812 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; 3813} 3814 3815defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; 3816defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; 3817defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; 3818defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; 3819 3820defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; 3821defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; 3822defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; 3823defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; 3824 3825defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; 3826defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; 3827defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; 3828defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; 3829 3830class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3831 : NVPTXInst<(outs outtype:$r), 3832 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3833 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", 3834 []>; 3835multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { 3836 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3837 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3838} 3839 3840defm SULD_1D_ARRAY_I8_CLAMP 3841 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; 3842defm SULD_1D_ARRAY_I16_CLAMP 3843 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; 3844defm SULD_1D_ARRAY_I32_CLAMP 3845 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; 3846defm SULD_1D_ARRAY_I64_CLAMP 3847 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; 3848 3849defm SULD_1D_ARRAY_I8_TRAP 3850 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; 3851defm SULD_1D_ARRAY_I16_TRAP 3852 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; 3853defm SULD_1D_ARRAY_I32_TRAP 3854 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; 3855defm SULD_1D_ARRAY_I64_TRAP 3856 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; 3857 3858defm SULD_1D_ARRAY_I8_ZERO 3859 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; 3860defm SULD_1D_ARRAY_I16_ZERO 3861 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; 3862defm SULD_1D_ARRAY_I32_ZERO 3863 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; 3864defm SULD_1D_ARRAY_I64_ZERO 3865 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; 3866 3867class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf> 3868 : NVPTXInst<(outs outtype:$r), 3869 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3870 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", 3871 []>; 3872multiclass SULD_2D<string inst, NVPTXRegClass outtype> { 3873 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>; 3874 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; 3875} 3876 3877defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; 3878defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; 3879defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; 3880defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; 3881 3882defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; 3883defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; 3884defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; 3885defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; 3886 3887defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; 3888defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; 3889defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; 3890defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; 3891 3892class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3893 : NVPTXInst<(outs outtype:$r), 3894 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3895 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3896 []>; 3897multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { 3898 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3899 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3900} 3901 3902defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; 3903defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; 3904defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; 3905defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; 3906 3907defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; 3908defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; 3909defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; 3910defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; 3911 3912defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; 3913defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; 3914defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; 3915defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; 3916 3917class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf> 3918 : NVPTXInst<(outs outtype:$r), 3919 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 3920 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3921 []>; 3922multiclass SULD_3D<string inst, NVPTXRegClass outtype> { 3923 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>; 3924 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; 3925} 3926 3927defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; 3928defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; 3929defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; 3930defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; 3931 3932defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; 3933defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; 3934defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; 3935defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; 3936 3937defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; 3938defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; 3939defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; 3940defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; 3941} 3942 3943let IsSuld = 2 in { 3944 3945class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3946 : NVPTXInst<(outs outtype:$r, outtype:$g), 3947 !con(surf, (ins Int32Regs:$x)), 3948 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", 3949 []>; 3950multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { 3951 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3952 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; 3953} 3954 3955defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; 3956defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; 3957defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; 3958defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; 3959 3960defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; 3961defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; 3962defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; 3963defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; 3964 3965defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; 3966defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; 3967defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; 3968defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; 3969 3970class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3971 : NVPTXInst<(outs outtype:$r, outtype:$g), 3972 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3973 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3974 []>; 3975multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 3976 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3977 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 3978} 3979 3980defm SULD_1D_ARRAY_V2I8_CLAMP 3981 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; 3982defm SULD_1D_ARRAY_V2I16_CLAMP 3983 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; 3984defm SULD_1D_ARRAY_V2I32_CLAMP 3985 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; 3986defm SULD_1D_ARRAY_V2I64_CLAMP 3987 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; 3988 3989defm SULD_1D_ARRAY_V2I8_TRAP 3990 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; 3991defm SULD_1D_ARRAY_V2I16_TRAP 3992 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; 3993defm SULD_1D_ARRAY_V2I32_TRAP 3994 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; 3995defm SULD_1D_ARRAY_V2I64_TRAP 3996 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; 3997 3998defm SULD_1D_ARRAY_V2I8_ZERO 3999 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; 4000defm SULD_1D_ARRAY_V2I16_ZERO 4001 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; 4002defm SULD_1D_ARRAY_V2I32_ZERO 4003 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; 4004defm SULD_1D_ARRAY_V2I64_ZERO 4005 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; 4006 4007class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4008 : NVPTXInst<(outs outtype:$r, outtype:$g), 4009 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4010 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4011 []>; 4012multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { 4013 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4014 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; 4015} 4016 4017defm SULD_2D_V2I8_CLAMP 4018 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; 4019defm SULD_2D_V2I16_CLAMP 4020 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; 4021defm SULD_2D_V2I32_CLAMP 4022 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; 4023defm SULD_2D_V2I64_CLAMP 4024 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; 4025 4026defm SULD_2D_V2I8_TRAP 4027 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; 4028defm SULD_2D_V2I16_TRAP 4029 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; 4030defm SULD_2D_V2I32_TRAP 4031 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; 4032defm SULD_2D_V2I64_TRAP 4033 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; 4034 4035defm SULD_2D_V2I8_ZERO 4036 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; 4037defm SULD_2D_V2I16_ZERO 4038 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; 4039defm SULD_2D_V2I32_ZERO 4040 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; 4041defm SULD_2D_V2I64_ZERO 4042 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; 4043 4044class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4045 : NVPTXInst<(outs outtype:$r, outtype:$g), 4046 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4047 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", 4048 []>; 4049multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 4050 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4051 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 4052} 4053 4054defm SULD_2D_ARRAY_V2I8_CLAMP 4055 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; 4056defm SULD_2D_ARRAY_V2I16_CLAMP 4057 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; 4058defm SULD_2D_ARRAY_V2I32_CLAMP 4059 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; 4060defm SULD_2D_ARRAY_V2I64_CLAMP 4061 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; 4062 4063defm SULD_2D_ARRAY_V2I8_TRAP 4064 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; 4065defm SULD_2D_ARRAY_V2I16_TRAP 4066 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; 4067defm SULD_2D_ARRAY_V2I32_TRAP 4068 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; 4069defm SULD_2D_ARRAY_V2I64_TRAP 4070 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; 4071 4072defm SULD_2D_ARRAY_V2I8_ZERO 4073 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; 4074defm SULD_2D_ARRAY_V2I16_ZERO 4075 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; 4076defm SULD_2D_ARRAY_V2I32_ZERO 4077 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; 4078defm SULD_2D_ARRAY_V2I64_ZERO 4079 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; 4080 4081class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4082 : NVPTXInst<(outs outtype:$r, outtype:$g), 4083 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4084 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4085 []>; 4086multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { 4087 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4088 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; 4089} 4090 4091defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; 4092defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; 4093defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; 4094defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; 4095 4096defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; 4097defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; 4098defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; 4099defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; 4100 4101defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; 4102defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; 4103defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; 4104defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; 4105 4106} 4107 4108let IsSuld = 3 in { 4109 4110class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4111 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4112 !con(surf, (ins Int32Regs:$x)), 4113 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4114 []>; 4115multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { 4116 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4117 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; 4118} 4119 4120defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; 4121defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; 4122defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; 4123 4124defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; 4125defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; 4126defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; 4127 4128defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; 4129defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; 4130defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; 4131 4132class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4133 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4134 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 4135 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", 4136 []>; 4137multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4138 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4139 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4140} 4141 4142defm SULD_1D_ARRAY_V4I8_CLAMP 4143 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; 4144defm SULD_1D_ARRAY_V4I16_CLAMP 4145 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; 4146defm SULD_1D_ARRAY_V4I32_CLAMP 4147 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; 4148 4149defm SULD_1D_ARRAY_V4I8_TRAP 4150 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; 4151defm SULD_1D_ARRAY_V4I16_TRAP 4152 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; 4153defm SULD_1D_ARRAY_V4I32_TRAP 4154 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; 4155 4156defm SULD_1D_ARRAY_V4I8_ZERO 4157 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; 4158defm SULD_1D_ARRAY_V4I16_ZERO 4159 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; 4160defm SULD_1D_ARRAY_V4I32_ZERO 4161 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; 4162 4163class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4164 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4165 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4166 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4167 []>; 4168multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { 4169 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4170 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; 4171} 4172 4173defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; 4174defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; 4175defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; 4176 4177defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; 4178defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; 4179defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; 4180 4181defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; 4182defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; 4183defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; 4184 4185class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4186 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4187 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4188 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", 4189 []>; 4190multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4191 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4192 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4193} 4194 4195defm SULD_2D_ARRAY_V4I8_CLAMP 4196 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; 4197defm SULD_2D_ARRAY_V4I16_CLAMP 4198 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; 4199defm SULD_2D_ARRAY_V4I32_CLAMP 4200 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; 4201 4202defm SULD_2D_ARRAY_V4I8_TRAP 4203 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; 4204defm SULD_2D_ARRAY_V4I16_TRAP 4205 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; 4206defm SULD_2D_ARRAY_V4I32_TRAP 4207 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; 4208 4209defm SULD_2D_ARRAY_V4I8_ZERO 4210 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; 4211defm SULD_2D_ARRAY_V4I16_ZERO 4212 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; 4213defm SULD_2D_ARRAY_V4I32_ZERO 4214 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; 4215 4216class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4217 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4218 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4219 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", 4220 []>; 4221multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { 4222 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4223 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; 4224} 4225 4226defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; 4227defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; 4228defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; 4229 4230defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; 4231defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; 4232defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; 4233 4234defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; 4235defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; 4236defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; 4237 4238} 4239 4240//----------------------------------- 4241// Texture Query Intrinsics 4242//----------------------------------- 4243 4244let IsSurfTexQuery = true in { 4245def TXQ_CHANNEL_ORDER_R 4246 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4247 "txq.channel_order.b32 \t$d, [$a];", 4248 []>; 4249def TXQ_CHANNEL_ORDER_I 4250 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4251 "txq.channel_order.b32 \t$d, [$a];", 4252 []>; 4253def TXQ_CHANNEL_DATA_TYPE_R 4254 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4255 "txq.channel_data_type.b32 \t$d, [$a];", 4256 []>; 4257def TXQ_CHANNEL_DATA_TYPE_I 4258 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4259 "txq.channel_data_type.b32 \t$d, [$a];", 4260 []>; 4261def TXQ_WIDTH_R 4262 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4263 "txq.width.b32 \t$d, [$a];", 4264 []>; 4265def TXQ_WIDTH_I 4266 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4267 "txq.width.b32 \t$d, [$a];", 4268 []>; 4269def TXQ_HEIGHT_R 4270 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4271 "txq.height.b32 \t$d, [$a];", 4272 []>; 4273def TXQ_HEIGHT_I 4274 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4275 "txq.height.b32 \t$d, [$a];", 4276 []>; 4277def TXQ_DEPTH_R 4278 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4279 "txq.depth.b32 \t$d, [$a];", 4280 []>; 4281def TXQ_DEPTH_I 4282 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4283 "txq.depth.b32 \t$d, [$a];", 4284 []>; 4285def TXQ_ARRAY_SIZE_R 4286 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4287 "txq.array_size.b32 \t$d, [$a];", 4288 []>; 4289def TXQ_ARRAY_SIZE_I 4290 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4291 "txq.array_size.b32 \t$d, [$a];", 4292 []>; 4293def TXQ_NUM_SAMPLES_R 4294 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4295 "txq.num_samples.b32 \t$d, [$a];", 4296 []>; 4297def TXQ_NUM_SAMPLES_I 4298 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4299 "txq.num_samples.b32 \t$d, [$a];", 4300 []>; 4301def TXQ_NUM_MIPMAP_LEVELS_R 4302 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4303 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4304 []>; 4305def TXQ_NUM_MIPMAP_LEVELS_I 4306 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4307 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4308 []>; 4309} 4310 4311def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4312 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4313def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4314 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4315def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4316 (TXQ_WIDTH_R Int64Regs:$a)>; 4317def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4318 (TXQ_HEIGHT_R Int64Regs:$a)>; 4319def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4320 (TXQ_DEPTH_R Int64Regs:$a)>; 4321def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4322 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; 4323def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4324 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; 4325def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4326 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; 4327 4328 4329//----------------------------------- 4330// Surface Query Intrinsics 4331//----------------------------------- 4332 4333let IsSurfTexQuery = true in { 4334def SUQ_CHANNEL_ORDER_R 4335 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4336 "suq.channel_order.b32 \t$d, [$a];", 4337 []>; 4338def SUQ_CHANNEL_ORDER_I 4339 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4340 "suq.channel_order.b32 \t$d, [$a];", 4341 []>; 4342def SUQ_CHANNEL_DATA_TYPE_R 4343 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4344 "suq.channel_data_type.b32 \t$d, [$a];", 4345 []>; 4346def SUQ_CHANNEL_DATA_TYPE_I 4347 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4348 "suq.channel_data_type.b32 \t$d, [$a];", 4349 []>; 4350def SUQ_WIDTH_R 4351 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4352 "suq.width.b32 \t$d, [$a];", 4353 []>; 4354def SUQ_WIDTH_I 4355 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4356 "suq.width.b32 \t$d, [$a];", 4357 []>; 4358def SUQ_HEIGHT_R 4359 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4360 "suq.height.b32 \t$d, [$a];", 4361 []>; 4362def SUQ_HEIGHT_I 4363 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4364 "suq.height.b32 \t$d, [$a];", 4365 []>; 4366def SUQ_DEPTH_R 4367 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4368 "suq.depth.b32 \t$d, [$a];", 4369 []>; 4370def SUQ_DEPTH_I 4371 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4372 "suq.depth.b32 \t$d, [$a];", 4373 []>; 4374def SUQ_ARRAY_SIZE_R 4375 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4376 "suq.array_size.b32 \t$d, [$a];", 4377 []>; 4378def SUQ_ARRAY_SIZE_I 4379 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4380 "suq.array_size.b32 \t$d, [$a];", 4381 []>; 4382} 4383 4384def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4385 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4386def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4387 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4388def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4389 (SUQ_WIDTH_R Int64Regs:$a)>; 4390def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4391 (SUQ_HEIGHT_R Int64Regs:$a)>; 4392def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4393 (SUQ_DEPTH_R Int64Regs:$a)>; 4394def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4395 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; 4396 4397 4398//===- Handle Query -------------------------------------------------------===// 4399 4400// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4401def ISTYPEP_SAMPLER 4402 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4403 "istypep.samplerref \t$d, $a;", 4404 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4405def ISTYPEP_SURFACE 4406 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4407 "istypep.surfref \t$d, $a;", 4408 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4409def ISTYPEP_TEXTURE 4410 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4411 "istypep.texref \t$d, $a;", 4412 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4413 4414//===- Surface Stores -----------------------------------------------------===// 4415 4416let IsSust = true in { 4417 4418class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf> 4419 : NVPTXInst<(outs), 4420 !con(surf, (ins Int32Regs:$x, intype:$r)), 4421 inst # " \t[$s, \\{$x\\}], \\{$r\\};", 4422 []>; 4423multiclass SUST_1D<string inst, NVPTXRegClass intype> { 4424 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>; 4425 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>; 4426} 4427 4428defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; 4429defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; 4430defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; 4431defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; 4432 4433defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; 4434defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; 4435defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; 4436defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; 4437 4438defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; 4439defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; 4440defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; 4441defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; 4442 4443defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; 4444defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; 4445defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; 4446 4447class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4448 : NVPTXInst<(outs), 4449 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), 4450 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", 4451 []>; 4452multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> { 4453 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4454 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>; 4455} 4456 4457defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; 4458defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; 4459defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; 4460defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; 4461 4462defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; 4463defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; 4464defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; 4465defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; 4466 4467defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; 4468defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; 4469defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; 4470defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; 4471 4472defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; 4473defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; 4474defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; 4475 4476class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4477 : NVPTXInst<(outs), 4478 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, 4479 intype:$b, intype:$a)), 4480 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4481 []>; 4482multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> { 4483 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4484 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>; 4485} 4486 4487defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; 4488defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; 4489defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; 4490 4491defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; 4492defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; 4493defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; 4494 4495defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; 4496defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; 4497defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; 4498 4499defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; 4500defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; 4501defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; 4502 4503class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4504 : NVPTXInst<(outs), 4505 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), 4506 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4507 []>; 4508multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> { 4509 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4510 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4511} 4512 4513defm SUST_B_1D_ARRAY_B8_CLAMP 4514 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; 4515defm SUST_B_1D_ARRAY_B16_CLAMP 4516 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; 4517defm SUST_B_1D_ARRAY_B32_CLAMP 4518 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; 4519defm SUST_B_1D_ARRAY_B64_CLAMP 4520 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; 4521 4522defm SUST_B_1D_ARRAY_B8_TRAP 4523 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; 4524defm SUST_B_1D_ARRAY_B16_TRAP 4525 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; 4526defm SUST_B_1D_ARRAY_B32_TRAP 4527 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; 4528defm SUST_B_1D_ARRAY_B64_TRAP 4529 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; 4530 4531defm SUST_B_1D_ARRAY_B8_ZERO 4532 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; 4533defm SUST_B_1D_ARRAY_B16_ZERO 4534 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; 4535defm SUST_B_1D_ARRAY_B32_ZERO 4536 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; 4537defm SUST_B_1D_ARRAY_B64_ZERO 4538 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; 4539 4540defm SUST_P_1D_ARRAY_B8_TRAP 4541 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; 4542defm SUST_P_1D_ARRAY_B16_TRAP 4543 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; 4544defm SUST_P_1D_ARRAY_B32_TRAP 4545 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; 4546 4547class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4548 : NVPTXInst<(outs), 4549 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4550 intype:$r, intype:$g)), 4551 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4552 []>; 4553multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4554 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4555 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4556} 4557 4558defm SUST_B_1D_ARRAY_V2B8_CLAMP 4559 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; 4560defm SUST_B_1D_ARRAY_V2B16_CLAMP 4561 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; 4562defm SUST_B_1D_ARRAY_V2B32_CLAMP 4563 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; 4564defm SUST_B_1D_ARRAY_V2B64_CLAMP 4565 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; 4566 4567defm SUST_B_1D_ARRAY_V2B8_TRAP 4568 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; 4569defm SUST_B_1D_ARRAY_V2B16_TRAP 4570 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; 4571defm SUST_B_1D_ARRAY_V2B32_TRAP 4572 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; 4573defm SUST_B_1D_ARRAY_V2B64_TRAP 4574 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; 4575 4576defm SUST_B_1D_ARRAY_V2B8_ZERO 4577 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; 4578defm SUST_B_1D_ARRAY_V2B16_ZERO 4579 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; 4580defm SUST_B_1D_ARRAY_V2B32_ZERO 4581 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; 4582defm SUST_B_1D_ARRAY_V2B64_ZERO 4583 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; 4584 4585defm SUST_P_1D_ARRAY_V2B8_TRAP 4586 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; 4587defm SUST_P_1D_ARRAY_V2B16_TRAP 4588 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; 4589defm SUST_P_1D_ARRAY_V2B32_TRAP 4590 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; 4591 4592class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4593 : NVPTXInst<(outs), 4594 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4595 intype:$r, intype:$g, intype:$b, intype:$a)), 4596 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", 4597 []>; 4598multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4599 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4600 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4601} 4602 4603defm SUST_B_1D_ARRAY_V4B8_CLAMP 4604 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; 4605defm SUST_B_1D_ARRAY_V4B16_CLAMP 4606 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; 4607defm SUST_B_1D_ARRAY_V4B32_CLAMP 4608 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; 4609 4610defm SUST_B_1D_ARRAY_V4B8_TRAP 4611 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; 4612defm SUST_B_1D_ARRAY_V4B16_TRAP 4613 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; 4614defm SUST_B_1D_ARRAY_V4B32_TRAP 4615 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; 4616 4617defm SUST_B_1D_ARRAY_V4B8_ZERO 4618 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; 4619defm SUST_B_1D_ARRAY_V4B16_ZERO 4620 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; 4621defm SUST_B_1D_ARRAY_V4B32_ZERO 4622 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; 4623 4624defm SUST_P_1D_ARRAY_V4B8_TRAP 4625 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; 4626defm SUST_P_1D_ARRAY_V4B16_TRAP 4627 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; 4628defm SUST_P_1D_ARRAY_V4B32_TRAP 4629 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; 4630 4631class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf> 4632 : NVPTXInst<(outs), 4633 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), 4634 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", 4635 []>; 4636multiclass SUST_2D<string inst, NVPTXRegClass intype> { 4637 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>; 4638 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>; 4639} 4640 4641defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; 4642defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; 4643defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; 4644defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; 4645 4646defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; 4647defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; 4648defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; 4649defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; 4650 4651defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; 4652defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; 4653defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; 4654defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; 4655 4656defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; 4657defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; 4658defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; 4659 4660class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4661 : NVPTXInst<(outs), 4662 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4663 intype:$r, intype:$g)), 4664 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4665 []>; 4666multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> { 4667 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4668 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>; 4669} 4670 4671defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; 4672defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; 4673defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; 4674defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; 4675 4676defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; 4677defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; 4678defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; 4679defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; 4680 4681defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; 4682defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; 4683defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; 4684defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; 4685 4686defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; 4687defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; 4688defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; 4689 4690class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4691 : NVPTXInst<(outs), 4692 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4693 intype:$r, intype:$g, intype:$b, intype:$a)), 4694 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", 4695 []>; 4696multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> { 4697 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4698 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>; 4699} 4700 4701defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; 4702defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; 4703defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; 4704 4705defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; 4706defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; 4707defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; 4708 4709defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; 4710defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; 4711defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; 4712 4713defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; 4714defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; 4715defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; 4716 4717class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4718 : NVPTXInst<(outs), 4719 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4720 intype:$r)), 4721 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4722 []>; 4723multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> { 4724 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4725 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4726} 4727 4728defm SUST_B_2D_ARRAY_B8_CLAMP 4729 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; 4730defm SUST_B_2D_ARRAY_B16_CLAMP 4731 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; 4732defm SUST_B_2D_ARRAY_B32_CLAMP 4733 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; 4734defm SUST_B_2D_ARRAY_B64_CLAMP 4735 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; 4736 4737defm SUST_B_2D_ARRAY_B8_TRAP 4738 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; 4739defm SUST_B_2D_ARRAY_B16_TRAP 4740 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; 4741defm SUST_B_2D_ARRAY_B32_TRAP 4742 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; 4743defm SUST_B_2D_ARRAY_B64_TRAP 4744 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; 4745 4746defm SUST_B_2D_ARRAY_B8_ZERO 4747 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; 4748defm SUST_B_2D_ARRAY_B16_ZERO 4749 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; 4750defm SUST_B_2D_ARRAY_B32_ZERO 4751 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; 4752defm SUST_B_2D_ARRAY_B64_ZERO 4753 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; 4754 4755defm SUST_P_2D_ARRAY_B8_TRAP 4756 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; 4757defm SUST_P_2D_ARRAY_B16_TRAP 4758 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; 4759defm SUST_P_2D_ARRAY_B32_TRAP 4760 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; 4761 4762class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4763 : NVPTXInst<(outs), 4764 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4765 intype:$r, intype:$g)), 4766 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", 4767 []>; 4768multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4769 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4770 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4771} 4772 4773defm SUST_B_2D_ARRAY_V2B8_CLAMP 4774 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; 4775defm SUST_B_2D_ARRAY_V2B16_CLAMP 4776 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; 4777defm SUST_B_2D_ARRAY_V2B32_CLAMP 4778 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; 4779defm SUST_B_2D_ARRAY_V2B64_CLAMP 4780 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; 4781 4782defm SUST_B_2D_ARRAY_V2B8_TRAP 4783 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; 4784defm SUST_B_2D_ARRAY_V2B16_TRAP 4785 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; 4786defm SUST_B_2D_ARRAY_V2B32_TRAP 4787 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; 4788defm SUST_B_2D_ARRAY_V2B64_TRAP 4789 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; 4790 4791defm SUST_B_2D_ARRAY_V2B8_ZERO 4792 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; 4793defm SUST_B_2D_ARRAY_V2B16_ZERO 4794 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; 4795defm SUST_B_2D_ARRAY_V2B32_ZERO 4796 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; 4797defm SUST_B_2D_ARRAY_V2B64_ZERO 4798 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; 4799 4800defm SUST_P_2D_ARRAY_V2B8_TRAP 4801 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; 4802defm SUST_P_2D_ARRAY_V2B16_TRAP 4803 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; 4804defm SUST_P_2D_ARRAY_V2B32_TRAP 4805 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; 4806 4807class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4808 : NVPTXInst<(outs), 4809 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4810 intype:$r, intype:$g, intype:$b, intype:$a)), 4811 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", 4812 []>; 4813multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4814 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4815 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4816} 4817 4818defm SUST_B_2D_ARRAY_V4B8_CLAMP 4819 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; 4820defm SUST_B_2D_ARRAY_V4B16_CLAMP 4821 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; 4822defm SUST_B_2D_ARRAY_V4B32_CLAMP 4823 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; 4824 4825defm SUST_B_2D_ARRAY_V4B8_TRAP 4826 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; 4827defm SUST_B_2D_ARRAY_V4B16_TRAP 4828 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; 4829defm SUST_B_2D_ARRAY_V4B32_TRAP 4830 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; 4831 4832defm SUST_B_2D_ARRAY_V4B8_ZERO 4833 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; 4834defm SUST_B_2D_ARRAY_V4B16_ZERO 4835 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; 4836defm SUST_B_2D_ARRAY_V4B32_ZERO 4837 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; 4838 4839defm SUST_P_2D_ARRAY_V4B8_TRAP 4840 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; 4841defm SUST_P_2D_ARRAY_V4B16_TRAP 4842 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; 4843defm SUST_P_2D_ARRAY_V4B32_TRAP 4844 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; 4845 4846class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf> 4847 : NVPTXInst<(outs), 4848 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4849 intype:$r)), 4850 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4851 []>; 4852multiclass SUST_3D<string inst, NVPTXRegClass intype> { 4853 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>; 4854 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>; 4855} 4856 4857defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; 4858defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; 4859defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; 4860defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; 4861 4862defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; 4863defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; 4864defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; 4865defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; 4866 4867defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; 4868defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; 4869defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; 4870defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; 4871 4872defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; 4873defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; 4874defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; 4875 4876class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4877 : NVPTXInst<(outs), 4878 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4879 intype:$r, intype:$g)), 4880 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", 4881 []>; 4882multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> { 4883 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4884 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>; 4885} 4886 4887defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; 4888defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; 4889defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; 4890defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; 4891 4892defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; 4893defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; 4894defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; 4895defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; 4896 4897defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; 4898defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; 4899defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; 4900defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; 4901 4902defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; 4903defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; 4904defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; 4905 4906class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4907 : NVPTXInst<(outs), 4908 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4909 intype:$r, intype:$g, intype:$b, intype:$a)), 4910 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", 4911 []>; 4912multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> { 4913 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4914 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>; 4915} 4916 4917defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; 4918defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; 4919defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; 4920 4921defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; 4922defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; 4923defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; 4924 4925defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; 4926defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; 4927defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; 4928 4929defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; 4930defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; 4931defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; 4932 4933} 4934 4935// Surface store instruction patterns 4936// I'm not sure why we can't just include these in the instruction definitions, 4937// but TableGen complains of type errors :( 4938 4939// .clamp variant 4940def : Pat<(int_nvvm_sust_b_1d_i8_clamp 4941 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4942 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4943 4944def : Pat<(int_nvvm_sust_b_1d_i16_clamp 4945 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4946 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4947 4948def : Pat<(int_nvvm_sust_b_1d_i32_clamp 4949 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4950 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 4951 4952def : Pat<(int_nvvm_sust_b_1d_i64_clamp 4953 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4954 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 4955 4956def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 4957 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4958 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4959 Int16Regs:$r, Int16Regs:$g)>; 4960 4961def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 4962 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4963 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4964 Int16Regs:$r, Int16Regs:$g)>; 4965 4966def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 4967 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4968 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4969 Int32Regs:$r, Int32Regs:$g)>; 4970 4971def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 4972 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4973 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4974 Int64Regs:$r, Int64Regs:$g)>; 4975 4976def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 4977 Int64Regs:$s, Int32Regs:$x, 4978 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4979 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4980 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4981 4982def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 4983 Int64Regs:$s, Int32Regs:$x, 4984 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4985 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4986 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4987 4988def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 4989 Int64Regs:$s, Int32Regs:$x, 4990 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4991 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4992 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4993 4994 4995 4996def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 4997 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 4998 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4999 Int16Regs:$r)>; 5000 5001def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 5002 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5003 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5004 Int16Regs:$r)>; 5005 5006def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 5007 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5008 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5009 Int32Regs:$r)>; 5010 5011def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 5012 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5013 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5014 Int64Regs:$r)>; 5015 5016def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 5017 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5018 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5019 Int16Regs:$r, Int16Regs:$g)>; 5020 5021def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 5022 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5023 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5024 Int16Regs:$r, Int16Regs:$g)>; 5025 5026def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 5027 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5028 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5029 Int32Regs:$r, Int32Regs:$g)>; 5030 5031def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 5032 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5033 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5034 Int64Regs:$r, Int64Regs:$g)>; 5035 5036def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 5037 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5038 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5039 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5040 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5041 5042def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 5043 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5044 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5045 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5046 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5047 5048def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 5049 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5050 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5051 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5052 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5053 5054 5055 5056def : Pat<(int_nvvm_sust_b_2d_i8_clamp 5057 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5058 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5059 Int16Regs:$r)>; 5060 5061def : Pat<(int_nvvm_sust_b_2d_i16_clamp 5062 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5063 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5064 Int16Regs:$r)>; 5065 5066def : Pat<(int_nvvm_sust_b_2d_i32_clamp 5067 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5068 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5069 Int32Regs:$r)>; 5070 5071def : Pat<(int_nvvm_sust_b_2d_i64_clamp 5072 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5073 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5074 Int64Regs:$r)>; 5075 5076def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 5077 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5078 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5079 Int16Regs:$r, Int16Regs:$g)>; 5080 5081def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 5082 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5083 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5084 Int16Regs:$r, Int16Regs:$g)>; 5085 5086def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 5087 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5088 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5089 Int32Regs:$r, Int32Regs:$g)>; 5090 5091def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 5092 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5093 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5094 Int64Regs:$r, Int64Regs:$g)>; 5095 5096def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 5097 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5098 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5099 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5100 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5101 5102def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 5103 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5104 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5105 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5106 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5107 5108def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 5109 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5110 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5111 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5112 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5113 5114 5115 5116def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 5117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5118 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, 5119 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5120 Int16Regs:$r)>; 5121 5122def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 5123 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5124 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, 5125 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5126 Int16Regs:$r)>; 5127 5128def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 5129 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5130 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, 5131 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5132 Int32Regs:$r)>; 5133 5134def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 5135 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5136 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, 5137 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5138 Int64Regs:$r)>; 5139 5140def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 5141 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5142 Int16Regs:$r, Int16Regs:$g), 5143 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5144 Int32Regs:$x, Int32Regs:$y, 5145 Int16Regs:$r, Int16Regs:$g)>; 5146 5147def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 5148 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5149 Int16Regs:$r, Int16Regs:$g), 5150 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5151 Int32Regs:$x, Int32Regs:$y, 5152 Int16Regs:$r, Int16Regs:$g)>; 5153 5154def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 5155 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5156 Int32Regs:$g), 5157 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5158 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5159 5160def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 5161 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5162 Int64Regs:$g), 5163 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5164 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5165 5166def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 5167 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5168 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5169 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, 5170 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5171 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5172 5173def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 5174 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5175 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5176 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, 5177 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5178 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5179 5180def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 5181 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5182 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5183 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5184 Int32Regs:$x, Int32Regs:$y, 5185 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5186 5187 5188 5189def : Pat<(int_nvvm_sust_b_3d_i8_clamp 5190 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5191 Int16Regs:$r), 5192 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, 5193 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5194 Int16Regs:$r)>; 5195 5196def : Pat<(int_nvvm_sust_b_3d_i16_clamp 5197 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5198 Int16Regs:$r), 5199 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, 5200 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5201 Int16Regs:$r)>; 5202 5203def : Pat<(int_nvvm_sust_b_3d_i32_clamp 5204 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5205 Int32Regs:$r), 5206 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, 5207 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5208 Int32Regs:$r)>; 5209 5210def : Pat<(int_nvvm_sust_b_3d_i64_clamp 5211 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5212 Int64Regs:$r), 5213 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, 5214 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5215 Int64Regs:$r)>; 5216 5217def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 5218 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5219 Int16Regs:$r, Int16Regs:$g), 5220 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, 5221 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5222 Int16Regs:$r, Int16Regs:$g)>; 5223 5224def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 5225 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5226 Int16Regs:$r, Int16Regs:$g), 5227 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, 5228 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5229 Int16Regs:$r, Int16Regs:$g)>; 5230 5231def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 5232 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5233 Int32Regs:$r, Int32Regs:$g), 5234 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, 5235 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5236 Int32Regs:$r, Int32Regs:$g)>; 5237 5238def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 5239 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5240 Int64Regs:$r, Int64Regs:$g), 5241 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, 5242 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5243 Int64Regs:$r, Int64Regs:$g)>; 5244 5245def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 5246 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5247 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5248 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, 5249 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5250 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5251 5252def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 5253 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5254 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5255 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, 5256 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5257 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5258 5259def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 5260 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5261 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5262 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, 5263 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5264 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5265 5266 5267// .trap variant 5268def : Pat<(int_nvvm_sust_b_1d_i8_trap 5269 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5270 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5271 5272def : Pat<(int_nvvm_sust_b_1d_i16_trap 5273 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5274 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5275 5276def : Pat<(int_nvvm_sust_b_1d_i32_trap 5277 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5278 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5279 5280def : Pat<(int_nvvm_sust_b_1d_i64_trap 5281 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5282 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5283 5284def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 5285 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5286 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5287 Int16Regs:$r, Int16Regs:$g)>; 5288 5289def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 5290 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5291 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5292 Int16Regs:$r, Int16Regs:$g)>; 5293 5294def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 5295 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5296 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5297 Int32Regs:$r, Int32Regs:$g)>; 5298 5299def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 5300 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5301 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, 5302 Int64Regs:$r, Int64Regs:$g)>; 5303 5304def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 5305 Int64Regs:$s, Int32Regs:$x, 5306 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5307 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5308 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5309 5310def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 5311 Int64Regs:$s, Int32Regs:$x, 5312 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5313 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5314 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5315 5316def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 5317 Int64Regs:$s, Int32Regs:$x, 5318 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5319 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5320 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5321 5322 5323 5324def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 5325 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5326 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5327 Int16Regs:$r)>; 5328 5329def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 5330 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5331 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5332 Int16Regs:$r)>; 5333 5334def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 5335 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5336 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5337 Int32Regs:$r)>; 5338 5339def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 5340 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5341 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5342 Int64Regs:$r)>; 5343 5344def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 5345 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5346 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5347 Int16Regs:$r, Int16Regs:$g)>; 5348 5349def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 5350 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5351 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5352 Int16Regs:$r, Int16Regs:$g)>; 5353 5354def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 5355 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5356 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5357 Int32Regs:$r, Int32Regs:$g)>; 5358 5359def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 5360 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5361 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5362 Int64Regs:$r, Int64Regs:$g)>; 5363 5364def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 5365 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5366 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5367 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5368 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5369 5370def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 5371 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5372 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5373 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5374 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5375 5376def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 5377 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5378 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5379 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5380 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5381 5382 5383 5384def : Pat<(int_nvvm_sust_b_2d_i8_trap 5385 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5386 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5387 Int16Regs:$r)>; 5388 5389def : Pat<(int_nvvm_sust_b_2d_i16_trap 5390 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5391 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5392 Int16Regs:$r)>; 5393 5394def : Pat<(int_nvvm_sust_b_2d_i32_trap 5395 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5396 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5397 Int32Regs:$r)>; 5398 5399def : Pat<(int_nvvm_sust_b_2d_i64_trap 5400 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5401 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5402 Int64Regs:$r)>; 5403 5404def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 5405 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5406 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5407 Int16Regs:$r, Int16Regs:$g)>; 5408 5409def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 5410 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5411 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5412 Int16Regs:$r, Int16Regs:$g)>; 5413 5414def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 5415 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5416 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5417 Int32Regs:$r, Int32Regs:$g)>; 5418 5419def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 5420 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5421 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5422 Int64Regs:$r, Int64Regs:$g)>; 5423 5424def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 5425 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5426 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5427 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5428 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5429 5430def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 5431 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5432 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5433 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5434 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5435 5436def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 5437 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5438 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5439 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5440 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5441 5442 5443 5444def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 5445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5446 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 5447 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5448 Int16Regs:$r)>; 5449 5450def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 5451 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5452 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 5453 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5454 Int16Regs:$r)>; 5455 5456def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 5457 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5458 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 5459 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5460 Int32Regs:$r)>; 5461 5462def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 5463 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5464 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, 5465 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5466 Int64Regs:$r)>; 5467 5468def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 5469 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5470 Int16Regs:$r, Int16Regs:$g), 5471 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 5472 Int32Regs:$x, Int32Regs:$y, 5473 Int16Regs:$r, Int16Regs:$g)>; 5474 5475def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 5476 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5477 Int16Regs:$r, Int16Regs:$g), 5478 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 5479 Int32Regs:$x, Int32Regs:$y, 5480 Int16Regs:$r, Int16Regs:$g)>; 5481 5482def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 5483 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5484 Int32Regs:$g), 5485 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5486 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5487 5488def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 5489 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5490 Int64Regs:$g), 5491 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, 5492 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5493 5494def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 5495 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5496 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5497 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 5498 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5499 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5500 5501def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 5502 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5503 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5504 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 5505 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5506 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5507 5508def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 5509 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5510 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5511 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5512 Int32Regs:$x, Int32Regs:$y, 5513 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5514 5515 5516 5517def : Pat<(int_nvvm_sust_b_3d_i8_trap 5518 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5519 Int16Regs:$r), 5520 (SUST_B_3D_B8_TRAP_R Int64Regs:$s, 5521 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5522 Int16Regs:$r)>; 5523 5524def : Pat<(int_nvvm_sust_b_3d_i16_trap 5525 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5526 Int16Regs:$r), 5527 (SUST_B_3D_B16_TRAP_R Int64Regs:$s, 5528 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5529 Int16Regs:$r)>; 5530 5531def : Pat<(int_nvvm_sust_b_3d_i32_trap 5532 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5533 Int32Regs:$r), 5534 (SUST_B_3D_B32_TRAP_R Int64Regs:$s, 5535 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5536 Int32Regs:$r)>; 5537 5538def : Pat<(int_nvvm_sust_b_3d_i64_trap 5539 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5540 Int64Regs:$r), 5541 (SUST_B_3D_B64_TRAP_R Int64Regs:$s, 5542 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5543 Int64Regs:$r)>; 5544 5545def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 5546 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5547 Int16Regs:$r, Int16Regs:$g), 5548 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, 5549 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5550 Int16Regs:$r, Int16Regs:$g)>; 5551 5552def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 5553 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5554 Int16Regs:$r, Int16Regs:$g), 5555 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, 5556 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5557 Int16Regs:$r, Int16Regs:$g)>; 5558 5559def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 5560 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5561 Int32Regs:$r, Int32Regs:$g), 5562 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, 5563 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5564 Int32Regs:$r, Int32Regs:$g)>; 5565 5566def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 5567 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5568 Int64Regs:$r, Int64Regs:$g), 5569 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, 5570 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5571 Int64Regs:$r, Int64Regs:$g)>; 5572 5573def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 5574 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5575 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5576 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, 5577 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5578 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5579 5580def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 5581 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5582 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5583 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, 5584 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5585 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5586 5587def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 5588 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5589 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5590 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, 5591 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5592 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5593 5594 5595// .zero variant 5596def : Pat<(int_nvvm_sust_b_1d_i8_zero 5597 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5598 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5599 5600def : Pat<(int_nvvm_sust_b_1d_i16_zero 5601 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5602 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5603 5604def : Pat<(int_nvvm_sust_b_1d_i32_zero 5605 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5606 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5607 5608def : Pat<(int_nvvm_sust_b_1d_i64_zero 5609 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5610 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5611 5612def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 5613 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5614 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5615 Int16Regs:$r, Int16Regs:$g)>; 5616 5617def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 5618 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5619 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5620 Int16Regs:$r, Int16Regs:$g)>; 5621 5622def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 5623 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5624 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5625 Int32Regs:$r, Int32Regs:$g)>; 5626 5627def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 5628 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5629 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, 5630 Int64Regs:$r, Int64Regs:$g)>; 5631 5632def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 5633 Int64Regs:$s, Int32Regs:$x, 5634 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5635 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5636 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5637 5638def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 5639 Int64Regs:$s, Int32Regs:$x, 5640 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5641 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5642 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5643 5644def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 5645 Int64Regs:$s, Int32Regs:$x, 5646 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5647 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5648 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5649 5650 5651 5652def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 5653 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5654 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5655 Int16Regs:$r)>; 5656 5657def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 5658 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5659 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5660 Int16Regs:$r)>; 5661 5662def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 5663 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5664 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5665 Int32Regs:$r)>; 5666 5667def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 5668 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5669 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5670 Int64Regs:$r)>; 5671 5672def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 5673 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5674 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5675 Int16Regs:$r, Int16Regs:$g)>; 5676 5677def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 5678 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5679 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5680 Int16Regs:$r, Int16Regs:$g)>; 5681 5682def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 5683 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5684 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5685 Int32Regs:$r, Int32Regs:$g)>; 5686 5687def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 5688 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5689 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5690 Int64Regs:$r, Int64Regs:$g)>; 5691 5692def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 5693 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5694 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5695 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5696 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5697 5698def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 5699 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5700 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5701 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5702 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5703 5704def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 5705 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5706 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5707 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5708 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5709 5710 5711 5712def : Pat<(int_nvvm_sust_b_2d_i8_zero 5713 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5714 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5715 Int16Regs:$r)>; 5716 5717def : Pat<(int_nvvm_sust_b_2d_i16_zero 5718 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5719 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5720 Int16Regs:$r)>; 5721 5722def : Pat<(int_nvvm_sust_b_2d_i32_zero 5723 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5724 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5725 Int32Regs:$r)>; 5726 5727def : Pat<(int_nvvm_sust_b_2d_i64_zero 5728 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5729 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5730 Int64Regs:$r)>; 5731 5732def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 5733 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5734 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5735 Int16Regs:$r, Int16Regs:$g)>; 5736 5737def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 5738 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5739 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5740 Int16Regs:$r, Int16Regs:$g)>; 5741 5742def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 5743 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5744 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5745 Int32Regs:$r, Int32Regs:$g)>; 5746 5747def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 5748 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5749 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5750 Int64Regs:$r, Int64Regs:$g)>; 5751 5752def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 5753 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5754 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5755 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5756 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5757 5758def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 5759 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5760 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5761 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5762 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5763 5764def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 5765 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5766 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5767 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5768 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5769 5770 5771 5772def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 5773 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5774 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, 5775 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5776 Int16Regs:$r)>; 5777 5778def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 5779 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5780 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, 5781 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5782 Int16Regs:$r)>; 5783 5784def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 5785 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5786 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, 5787 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5788 Int32Regs:$r)>; 5789 5790def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 5791 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5792 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, 5793 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5794 Int64Regs:$r)>; 5795 5796def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 5797 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5798 Int16Regs:$r, Int16Regs:$g), 5799 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, 5800 Int32Regs:$x, Int32Regs:$y, 5801 Int16Regs:$r, Int16Regs:$g)>; 5802 5803def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 5804 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5805 Int16Regs:$r, Int16Regs:$g), 5806 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, 5807 Int32Regs:$x, Int32Regs:$y, 5808 Int16Regs:$r, Int16Regs:$g)>; 5809 5810def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 5811 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5812 Int32Regs:$g), 5813 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5814 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5815 5816def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 5817 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5818 Int64Regs:$g), 5819 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, 5820 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5821 5822def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 5823 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5824 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5825 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, 5826 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5827 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5828 5829def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 5830 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5831 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5832 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, 5833 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5834 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5835 5836def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 5837 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5838 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5839 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5840 Int32Regs:$x, Int32Regs:$y, 5841 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5842 5843 5844 5845def : Pat<(int_nvvm_sust_b_3d_i8_zero 5846 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5847 Int16Regs:$r), 5848 (SUST_B_3D_B8_ZERO_R Int64Regs:$s, 5849 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5850 Int16Regs:$r)>; 5851 5852def : Pat<(int_nvvm_sust_b_3d_i16_zero 5853 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5854 Int16Regs:$r), 5855 (SUST_B_3D_B16_ZERO_R Int64Regs:$s, 5856 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5857 Int16Regs:$r)>; 5858 5859def : Pat<(int_nvvm_sust_b_3d_i32_zero 5860 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5861 Int32Regs:$r), 5862 (SUST_B_3D_B32_ZERO_R Int64Regs:$s, 5863 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5864 Int32Regs:$r)>; 5865 5866def : Pat<(int_nvvm_sust_b_3d_i64_zero 5867 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5868 Int64Regs:$r), 5869 (SUST_B_3D_B64_ZERO_R Int64Regs:$s, 5870 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5871 Int64Regs:$r)>; 5872 5873def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 5874 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5875 Int16Regs:$r, Int16Regs:$g), 5876 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, 5877 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5878 Int16Regs:$r, Int16Regs:$g)>; 5879 5880def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 5881 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5882 Int16Regs:$r, Int16Regs:$g), 5883 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, 5884 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5885 Int16Regs:$r, Int16Regs:$g)>; 5886 5887def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 5888 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5889 Int32Regs:$r, Int32Regs:$g), 5890 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, 5891 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5892 Int32Regs:$r, Int32Regs:$g)>; 5893 5894def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 5895 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5896 Int64Regs:$r, Int64Regs:$g), 5897 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, 5898 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5899 Int64Regs:$r, Int64Regs:$g)>; 5900 5901def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 5902 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5903 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5904 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, 5905 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5906 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5907 5908def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 5909 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5910 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5911 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, 5912 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5913 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5914 5915def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 5916 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5917 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5918 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, 5919 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5920 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5921 5922 5923 5924 5925def : Pat<(int_nvvm_sust_p_1d_i8_trap 5926 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5927 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5928 5929def : Pat<(int_nvvm_sust_p_1d_i16_trap 5930 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5931 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5932 5933def : Pat<(int_nvvm_sust_p_1d_i32_trap 5934 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5935 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5936 5937def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 5938 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5939 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5940 Int16Regs:$r, Int16Regs:$g)>; 5941 5942def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 5943 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5944 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5945 Int16Regs:$r, Int16Regs:$g)>; 5946 5947def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 5948 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5949 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5950 Int32Regs:$r, Int32Regs:$g)>; 5951 5952def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 5953 Int64Regs:$s, Int32Regs:$x, 5954 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5955 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5956 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5957 5958def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 5959 Int64Regs:$s, Int32Regs:$x, 5960 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5961 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5962 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5963 5964def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 5965 Int64Regs:$s, Int32Regs:$x, 5966 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5967 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5968 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5969 5970 5971 5972def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 5973 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5974 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5975 Int16Regs:$r)>; 5976 5977def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 5978 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5979 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5980 Int16Regs:$r)>; 5981 5982def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 5983 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5984 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5985 Int32Regs:$r)>; 5986 5987def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 5988 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5989 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5990 Int16Regs:$r, Int16Regs:$g)>; 5991 5992def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 5993 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5994 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5995 Int16Regs:$r, Int16Regs:$g)>; 5996 5997def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 5998 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5999 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6000 Int32Regs:$r, Int32Regs:$g)>; 6001 6002def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 6003 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6004 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6005 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6006 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6007 6008def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 6009 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6010 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6011 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6012 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6013 6014def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 6015 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6016 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6017 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6018 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6019 6020 6021 6022def : Pat<(int_nvvm_sust_p_2d_i8_trap 6023 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6024 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6025 Int16Regs:$r)>; 6026 6027def : Pat<(int_nvvm_sust_p_2d_i16_trap 6028 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6029 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6030 Int16Regs:$r)>; 6031 6032def : Pat<(int_nvvm_sust_p_2d_i32_trap 6033 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6034 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6035 Int32Regs:$r)>; 6036 6037def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 6038 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6039 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6040 Int16Regs:$r, Int16Regs:$g)>; 6041 6042def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 6043 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6044 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6045 Int16Regs:$r, Int16Regs:$g)>; 6046 6047def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 6048 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6049 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6050 Int32Regs:$r, Int32Regs:$g)>; 6051 6052def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 6053 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6054 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6055 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6056 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6057 6058def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 6059 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6060 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6061 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6062 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6063 6064def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 6065 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6066 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6067 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6068 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6069 6070 6071 6072def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 6073 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6074 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 6075 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6076 Int16Regs:$r)>; 6077 6078def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 6079 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6080 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 6081 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6082 Int16Regs:$r)>; 6083 6084def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 6085 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6086 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 6087 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6088 Int32Regs:$r)>; 6089 6090def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 6091 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6092 Int16Regs:$r, Int16Regs:$g), 6093 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 6094 Int32Regs:$x, Int32Regs:$y, 6095 Int16Regs:$r, Int16Regs:$g)>; 6096 6097def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 6098 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6099 Int16Regs:$r, Int16Regs:$g), 6100 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 6101 Int32Regs:$x, Int32Regs:$y, 6102 Int16Regs:$r, Int16Regs:$g)>; 6103 6104def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 6105 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6106 Int32Regs:$g), 6107 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6108 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6109 6110def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 6111 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6112 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6113 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 6114 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6115 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6116 6117def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 6118 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6119 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6120 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 6121 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6122 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6123 6124def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 6125 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6126 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6127 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6128 Int32Regs:$x, Int32Regs:$y, 6129 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6130 6131 6132 6133def : Pat<(int_nvvm_sust_p_3d_i8_trap 6134 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6135 Int16Regs:$r), 6136 (SUST_P_3D_B8_TRAP_R Int64Regs:$s, 6137 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6138 Int16Regs:$r)>; 6139 6140def : Pat<(int_nvvm_sust_p_3d_i16_trap 6141 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6142 Int16Regs:$r), 6143 (SUST_P_3D_B16_TRAP_R Int64Regs:$s, 6144 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6145 Int16Regs:$r)>; 6146 6147def : Pat<(int_nvvm_sust_p_3d_i32_trap 6148 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6149 Int32Regs:$r), 6150 (SUST_P_3D_B32_TRAP_R Int64Regs:$s, 6151 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6152 Int32Regs:$r)>; 6153 6154def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 6155 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6156 Int16Regs:$r, Int16Regs:$g), 6157 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, 6158 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6159 Int16Regs:$r, Int16Regs:$g)>; 6160 6161def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 6162 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6163 Int16Regs:$r, Int16Regs:$g), 6164 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, 6165 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6166 Int16Regs:$r, Int16Regs:$g)>; 6167 6168def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 6169 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6170 Int32Regs:$r, Int32Regs:$g), 6171 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, 6172 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6173 Int32Regs:$r, Int32Regs:$g)>; 6174 6175def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 6176 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6177 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6178 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, 6179 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6180 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6181 6182def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 6183 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6184 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6185 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, 6186 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6187 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6188 6189def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 6190 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6191 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6192 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, 6193 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6194 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6195 6196//----------------------------------- 6197// Read Special Registers 6198//----------------------------------- 6199 6200class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]> 6201 : NVPTXInst<(outs Int64Regs:$d), (ins), 6202 !strconcat("mov.u64 \t$d, %", regname, ";"), 6203 [(set Int64Regs:$d, (intop))]>, 6204 Requires<Preds>; 6205 6206class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]> 6207 : NVPTXInst<(outs Int32Regs:$d), (ins), 6208 !strconcat("mov.u32 \t$d, %", regname, ";"), 6209 [(set Int32Regs:$d, (intop))]>, 6210 Requires<Preds>; 6211 6212multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> { 6213 foreach suffix = ["x", "y", "z", "w"] in { 6214 defvar reg = regname # "." # suffix; 6215 defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix); 6216 def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>; 6217 } 6218} 6219 6220// TODO Add read vector-version of special registers 6221 6222defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">; 6223defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">; 6224defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">; 6225defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">; 6226 6227defm INT_PTX_SREG_CLUSTERID : 6228 PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>; 6229defm INT_PTX_SREG_NCLUSTERID : 6230 PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>; 6231defm INT_PTX_SREG_CLUSTER_CTAID : 6232 PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>; 6233defm INT_PTX_SREG_CLUSTER_NCTAID: 6234 PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>; 6235 6236def INT_PTX_SREG_CLUSTER_CTARANK : 6237 PTX_READ_SREG_R32<"cluster_ctarank", 6238 int_nvvm_read_ptx_sreg_cluster_ctarank, 6239 [hasSM<90>, hasPTX<78>]>; 6240def INT_PTX_SREG_CLUSTER_NCTARANK: 6241 PTX_READ_SREG_R32<"cluster_nctarank", 6242 int_nvvm_read_ptx_sreg_cluster_nctarank, 6243 [hasSM<90>, hasPTX<78>]>; 6244 6245 6246def INT_PTX_SREG_LANEID : 6247 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 6248def INT_PTX_SREG_WARPID : 6249 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 6250def INT_PTX_SREG_NWARPID : 6251 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 6252def INT_PTX_SREG_SMID : 6253 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 6254def INT_PTX_SREG_NSMID : 6255 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 6256def INT_PTX_SREG_GRIDID : 6257 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 6258 6259def INT_PTX_SREG_LANEMASK_EQ : 6260 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 6261def INT_PTX_SREG_LANEMASK_LE : 6262 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 6263def INT_PTX_SREG_LANEMASK_LT : 6264 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 6265def INT_PTX_SREG_LANEMASK_GE : 6266 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 6267def INT_PTX_SREG_LANEMASK_GT : 6268 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 6269 6270def INT_PTX_SREG_CLOCK : 6271 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 6272def INT_PTX_SREG_CLOCK64 : 6273 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 6274 6275def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 6276def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 6277def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 6278def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 6279 6280// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 6281// handle the constant. 6282def INT_PTX_SREG_WARPSIZE : 6283 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 6284 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 6285 6286// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 6287// In addition to target-independent fields provided by WMMA_REGS, it adds 6288// the fields commonly used to implement specific PTX instruction -- register 6289// types and names, constraints, parts of assembly, etc. 6290class WMMA_REGINFO<WMMA_REGS r, string op> 6291 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 6292 // NVPTX register types used to carry fragment data. 6293 NVPTXRegClass regclass = !cond( 6294 !eq(ptx_elt_type, "f16") : Int32Regs, 6295 !eq(ptx_elt_type, "f32") : Float32Regs, 6296 !eq(ptx_elt_type, "f64") : Float64Regs, 6297 !eq(ptx_elt_type, "bf16") : Int32Regs, 6298 !eq(ptx_elt_type, "tf32") : Int32Regs, 6299 !eq(ptx_elt_type, "s32") : Int32Regs, 6300 !eq(ptx_elt_type, "b16") : Int32Regs, 6301 !eq(ptx_elt_type, "s8") : Int32Regs, 6302 !eq(ptx_elt_type, "u8") : Int32Regs, 6303 !eq(ptx_elt_type, "s4") : Int32Regs, 6304 !eq(ptx_elt_type, "u4") : Int32Regs, 6305 !eq(ptx_elt_type, "b1") : Int32Regs); 6306 6307 // Instruction input/output arguments for the fragment. 6308 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 6309 6310 // List of register names for the fragment -- ["ra0", "ra1",...] 6311 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 6312 6313 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 6314 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 6315 6316 // Predicates for particular fragment variant. Technically those are 6317 // per-instruction predicates, but currently all fragments that can be used in 6318 // a given instruction are subject to the same constraints, so an instruction 6319 // can use predicates from any of its fragments. If/when this is no 6320 // longer the case, we can concat all per-fragment predicates to enforce that 6321 // all fragments of the instruction are viable. 6322 list<Predicate> Predicates = !cond( 6323 // fp16 -> fp16/fp32 @ m16n16k16 6324 !and(!eq(geom, "m16n16k16"), 6325 !or(!eq(ptx_elt_type, "f16"), 6326 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>], 6327 6328 !and(!eq(geom,"m8n8k4"), 6329 !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>], 6330 6331 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 6332 !and(!or(!eq(geom, "m8n32k16"), 6333 !eq(geom, "m32n8k16")), 6334 !or(!eq(ptx_elt_type, "f16"), 6335 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>], 6336 6337 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 6338 !and(!or(!eq(geom,"m16n16k16"), 6339 !eq(geom,"m8n32k16"), 6340 !eq(geom,"m32n8k16")), 6341 !or(!eq(ptx_elt_type, "u8"), 6342 !eq(ptx_elt_type, "s8"), 6343 !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>], 6344 6345 !and(!or(!eq(geom,"m16n16k16"), 6346 !eq(geom,"m8n32k16"), 6347 !eq(geom,"m32n8k16")), 6348 !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>], 6349 6350 !and(!eq(geom,"m16n16k8"), 6351 !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>], 6352 6353 !and(!eq(geom,"m16n16k8"), 6354 !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>], 6355 6356 // b1 -> s32 @ m8n8k128(b1) 6357 !and(!ne(op,"mma"), 6358 !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>], 6359 6360 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 6361 !and(!ne(op,"mma"), 6362 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>], 6363 6364 !or(!eq(geom,"m16n8k8"), 6365 !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>], 6366 6367 !and(!ne(ptx_elt_type,"f64"), 6368 !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>], 6369 6370 // mma m8n8k32 requires higher PTX version 6371 !and(!eq(op,"mma"), 6372 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>], 6373 6374 !and(!eq(ptx_elt_type,"f64"), 6375 !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>], 6376 6377 !and(!eq(op,"mma"), 6378 !or(!eq(geom, "m16n8k16"), 6379 !eq(geom, "m16n8k4"), 6380 !eq(geom, "m16n8k32"), 6381 !eq(geom, "m16n8k64"), 6382 !eq(geom, "m8n8k128"), 6383 !eq(geom, "m16n8k128"), 6384 !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>], 6385 6386 !and(!eq(op,"ldmatrix"), 6387 !eq(ptx_elt_type,"b16"), 6388 !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]); 6389 6390 // template DAGs for instruction inputs/output. 6391 dag Outs = !dag(outs, ptx_regs, reg_names); 6392 dag Ins = !dag(ins, ptx_regs, reg_names); 6393} 6394 6395// Convert dag of arguments into a dag to match given intrinsic. 6396class BuildPatternI<Intrinsic Intr, dag Ins> { 6397 // Build a dag pattern that matches the intrinsic call. 6398 dag ret = !foreach(tmp, Ins, 6399 !subst(imem, ADDRvar, 6400 !subst(MEMri64, ADDRri64, 6401 !subst(MEMri, ADDRri, 6402 !subst(ins, Intr, tmp))))); 6403} 6404 6405// Same as above, but uses PatFrag instead of an Intrinsic. 6406class BuildPatternPF<PatFrag Intr, dag Ins> { 6407 // Build a dag pattern that matches the intrinsic call. 6408 dag ret = !foreach(tmp, Ins, 6409 !subst(imem, ADDRvar, 6410 !subst(MEMri64, ADDRri64, 6411 !subst(MEMri, ADDRri, 6412 !subst(ins, Intr, tmp))))); 6413} 6414 6415// Common WMMA-related fields used for building patterns for all MMA instructions. 6416class WMMA_INSTR<string _Intr, list<dag> _Args> 6417 : NVPTXInst<(outs), (ins), "?", []> { 6418 Intrinsic Intr = !cast<Intrinsic>(_Intr); 6419 // Concatenate all arguments into a single dag. 6420 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 6421 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 6422 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 6423} 6424 6425// 6426// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6427// 6428 6429class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 6430 DAGOperand SrcOp> 6431 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 6432 [!con((ins SrcOp:$src), 6433 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6434 Requires<Frag.Predicates> { 6435 // Load/store intrinsics are overloaded on pointer's address space. 6436 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6437 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6438 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 6439 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 6440 // Build PatFrag that only matches particular address space. 6441 PatFrag IntrFrag = PatFrag<PFOperands, 6442 PFOperandsIntr, 6443 !cond(!eq(Space, ".shared"): AS_match.shared, 6444 !eq(Space, ".global"): AS_match.global, 6445 true: AS_match.generic)>; 6446 // Build AS-constrained pattern. 6447 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6448 6449 let OutOperandList = Frag.Outs; 6450 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6451 let AsmString = "wmma.load." 6452 # Frag.frag 6453 # ".sync" 6454 # "${ptx:aligned}" 6455 # "." # Layout 6456 # "." # Frag.geom 6457 # Space 6458 # "." # Frag.ptx_elt_type # " \t" 6459 # Frag.regstring 6460 # ", [$src]" 6461 # !if(WithStride, ", $ldm", "") 6462 # ";"; 6463} 6464 6465// 6466// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6467// 6468class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 6469 bit WithStride, DAGOperand DstOp> 6470 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 6471 [!con((ins DstOp:$dst), 6472 Frag.Ins, 6473 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6474 Requires<Frag.Predicates> { 6475 6476 // Load/store intrinsics are overloaded on pointer's address space. 6477 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6478 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6479 dag PFOperands = !con((ops node:$dst), 6480 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 6481 !if(WithStride, (ops node:$ldm), (ops))); 6482 // Build PatFrag that only matches particular address space. 6483 PatFrag IntrFrag = PatFrag<PFOperands, 6484 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 6485 !cond(!eq(Space, ".shared"): AS_match.shared, 6486 !eq(Space, ".global"): AS_match.global, 6487 true: AS_match.generic)>; 6488 // Build AS-constrained pattern. 6489 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6490 6491 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6492 let OutOperandList = (outs); 6493 let AsmString = "wmma.store.d.sync" 6494 # "${ptx:aligned}" 6495 # "." # Layout 6496 # "." # Frag.geom 6497 # Space 6498 # "." # Frag.ptx_elt_type 6499 # " \t[$dst]," 6500 # Frag.regstring 6501 # !if(WithStride, ", $ldm", "") 6502 # ";"; 6503} 6504 6505// Create all load/store variants 6506defset list<WMMA_INSTR> MMA_LDSTs = { 6507 foreach layout = ["row", "col"] in { 6508 foreach stride = [false, true] in { 6509 foreach space = [".global", ".shared", ""] in { 6510 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6511 foreach frag = NVVM_MMA_OPS.all_ld_ops in 6512 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6513 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 6514 foreach frag = NVVM_MMA_OPS.all_st_ops in 6515 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6516 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 6517 } // addr 6518 } // space 6519 } // stride 6520 } // layout 6521} // defset 6522 6523// B1 instruction variants need extra constraints. 6524class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 6525 string Op = b1op; 6526 WMMA_REGINFO Frag = FragA; 6527 list<Predicate> ret = !listconcat( 6528 FragA.Predicates, 6529 !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[]) 6530 ); 6531} 6532// WMMA.MMA 6533class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6534 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6535 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 6536 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 6537 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6538 // Requires does not seem to have effect on Instruction w/o Patterns. 6539 // We set it here anyways and propagate to the Pat<> we construct below. 6540 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6541 let OutOperandList = FragD.Outs; 6542 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6543 string TypeList = !cond( 6544 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 6545 # "." # FragC.ptx_elt_type, 6546 1: "." # FragD.ptx_elt_type 6547 # "." # FragA.ptx_elt_type 6548 # "." # FragB.ptx_elt_type 6549 # "." # FragC.ptx_elt_type, 6550 ); 6551 let AsmString = "wmma.mma" 6552 # b1op 6553 # ".sync" 6554 # "${ptx:aligned}" 6555 # "." # ALayout 6556 # "." # BLayout 6557 # "." # FragA.geom 6558 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 6559 # TypeList 6560 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 6561 # FragD.regstring # ",\n\t\t" 6562 # FragA.regstring # ",\n\t\t" 6563 # FragB.regstring # ",\n\t\t" 6564 # FragC.regstring # ";"; 6565} 6566 6567defset list<WMMA_INSTR> WMMAs = { 6568 foreach layout_a = ["row", "col"] in { 6569 foreach layout_b = ["row", "col"] in { 6570 foreach satf = [0, 1] in { 6571 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 6572 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 6573 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6574 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 6575 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 6576 WMMA_REGINFO<op[1], "wmma.mma">, 6577 WMMA_REGINFO<op[2], "wmma.mma">, 6578 WMMA_REGINFO<op[3], "wmma.mma">, 6579 layout_a, layout_b, satf, rnd, b1op>; 6580 } 6581 } // b1op 6582 } // op 6583 } // rnd 6584 } // satf 6585 } // layout_b 6586 } // layout_a 6587} // defset 6588 6589// MMA 6590class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6591 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6592 string ALayout, string BLayout, int Satfinite, string b1op> 6593 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 6594 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6595 // Requires does not seem to have effect on Instruction w/o Patterns. 6596 // We set it here anyways and propagate to the Pat<> we construct below. 6597 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6598 let OutOperandList = FragD.Outs; 6599 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6600 string TypeList = "." # FragD.ptx_elt_type 6601 # "." # FragA.ptx_elt_type 6602 # "." # FragB.ptx_elt_type 6603 # "." # FragC.ptx_elt_type; 6604 let AsmString = "mma.sync.aligned." 6605 # FragA.geom 6606 # "." # ALayout 6607 # "." # BLayout 6608 # !if(Satfinite, ".satfinite", "") 6609 # TypeList 6610 # b1op # "\n\t\t" 6611 # FragD.regstring # ",\n\t\t" 6612 # FragA.regstring # ",\n\t\t" 6613 # FragB.regstring # ",\n\t\t" 6614 # FragC.regstring # ";"; 6615} 6616 6617defset list<WMMA_INSTR> MMAs = { 6618 foreach layout_a = ["row", "col"] in { 6619 foreach layout_b = ["row", "col"] in { 6620 foreach satf = [0, 1] in { 6621 foreach op = NVVM_MMA_OPS.all_mma_ops in { 6622 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6623 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 6624 def : MMA<WMMA_REGINFO<op[0], "mma">, 6625 WMMA_REGINFO<op[1], "mma">, 6626 WMMA_REGINFO<op[2], "mma">, 6627 WMMA_REGINFO<op[3], "mma">, 6628 layout_a, layout_b, satf, b1op>; 6629 } 6630 } // b1op 6631 } // op 6632 } // satf 6633 } // layout_b 6634 } // layout_a 6635} // defset 6636 6637// 6638// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 6639// 6640class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, 6641 DAGOperand SrcOp> 6642 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, 6643 Requires<Frag.Predicates> { 6644 // Build PatFrag that only matches particular address space. 6645 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), 6646 !cond(!eq(Space, ".shared"): AS_match.shared, 6647 true: AS_match.generic)>; 6648 // Build AS-constrained pattern. 6649 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6650 6651 let OutOperandList = Frag.Outs; 6652 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6653 let AsmString = "ldmatrix.sync.aligned." 6654 # Frag.geom 6655 # "." # Frag.frag 6656 # !if(Transposed, ".trans", "") 6657 # Space 6658 # "." # Frag.ptx_elt_type 6659 # " " # Frag.regstring # ", [$src];"; 6660} 6661 6662// Create all ldmatrix variants 6663defset list<WMMA_INSTR> LDMATRIXs = { 6664 foreach transposed = [false, true] in { 6665 foreach space = [".shared", ""] in { 6666 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6667 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in 6668 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then 6669 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, 6670 addr>; 6671 } // addr 6672 } // space 6673 } // transposed 6674} // defset 6675 6676// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 6677// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 6678// the instruction record. 6679class MMA_PAT<WMMA_INSTR wi> 6680 : Pat<wi.IntrinsicPattern, 6681 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 6682 (wi ptx.version))>, 6683 Requires<wi.Predicates>; 6684 6685// Build intrinsic->instruction patterns for all MMA instructions. 6686foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in 6687 def : MMA_PAT<mma>; 6688 6689multiclass MAPA<string suffix, Intrinsic Intr> { 6690 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b), 6691 "mapa" # suffix # ".u32\t$d, $a, $b;", 6692 [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>, 6693 Requires<[hasSM<90>, hasPTX<78>]>; 6694 def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b), 6695 "mapa" # suffix # ".u32\t$d, $a, $b;", 6696 [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>, 6697 Requires<[hasSM<90>, hasPTX<78>]>; 6698 def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b), 6699 "mapa" # suffix # ".u64\t$d, $a, $b;", 6700 [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>, 6701 Requires<[hasSM<90>, hasPTX<78>]>; 6702 def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b), 6703 "mapa" # suffix # ".u64\t$d, $a, $b;", 6704 [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>, 6705 Requires<[hasSM<90>, hasPTX<78>]>; 6706} 6707 6708defm mapa : MAPA<"", int_nvvm_mapa>; 6709defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>; 6710 6711 6712multiclass GETCTARANK<string suffix, Intrinsic Intr> { 6713 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 6714 "getctarank" # suffix # ".u32\t$d, $a;", 6715 [(set Int32Regs:$d, (Intr Int32Regs:$a))]>, 6716 Requires<[hasSM<90>, hasPTX<78>]>; 6717 def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 6718 "getctarank" # suffix # ".u64\t$d, $a;", 6719 [(set Int32Regs:$d, (Intr Int64Regs:$a))]>, 6720 Requires<[hasSM<90>, hasPTX<78>]>; 6721} 6722 6723defm getctarank : GETCTARANK<"", int_nvvm_getctarank>; 6724defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>; 6725 6726def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), 6727 "mov.pred\t$d, %is_explicit_cluster;", 6728 [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, 6729 Requires<[hasSM<90>, hasPTX<78>]>; 6730