1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX60, hasSM30]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX60, hasSM30]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX60, hasSM30]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX60, hasSM30]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX60, hasSM30]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX60, hasSM30]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX60, hasSM30]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX60, hasSM30]>; 135 136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 137 bit offset_imm, bit mask_imm, bit threadmask_imm> 138 : NVPTXInst<(outs), (ins), "?", []> { 139 NVPTXRegClass rc = !cond( 140 !eq(reg, "i32"): Int32Regs, 141 !eq(reg, "f32"): Float32Regs); 142 string IntrName = "int_nvvm_shfl_" 143 # !if(sync, "sync_", "") 144 # mode 145 # "_" # reg 146 # !if(return_pred, "p", ""); 147 Intrinsic Intr = !cast<Intrinsic>(IntrName); 148 let InOperandList = !con( 149 !if(sync, 150 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 151 (ins)), 152 (ins rc:$src), 153 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 154 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 155 ); 156 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 157 let AsmString = "shfl." 158 # !if(sync, "sync.", "") 159 # mode # ".b32\t" 160 # "$dst" 161 # !if(return_pred, "|$pred", "") # ", " 162 # "$src, $offset, $mask" 163 # !if(sync, ", $threadmask", "") 164 # ";" 165 ; 166 let Pattern = [!con( 167 !foreach(tmp, OutOperandList, 168 !subst(outs, set, 169 !subst(i32imm, imm, tmp))), 170 (set !foreach(tmp, InOperandList, 171 !subst(ins, Intr, 172 !subst(i32imm, imm, tmp)))) 173 )]; 174} 175 176foreach sync = [false, true] in { 177 foreach mode = ["up", "down", "bfly", "idx"] in { 178 foreach regclass = ["i32", "f32"] in { 179 foreach return_pred = [false, true] in { 180 foreach offset_imm = [false, true] in { 181 foreach mask_imm = [false, true] in { 182 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 183 def : SHFL_INSTR<sync, mode, regclass, return_pred, 184 offset_imm, mask_imm, threadmask_imm>, 185 Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>; 186 } 187 } 188 } 189 } 190 } 191 } 192} 193 194// vote.{all,any,uni,ballot} 195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 196 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 197 "vote." # mode # " \t$dest, $pred;", 198 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 199 Requires<[hasPTX60, hasSM30]>; 200} 201 202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 206 207// vote.sync.{all,any,uni,ballot} 208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 209 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 210 "vote.sync." # mode # " \t$dest, $pred, $mask;", 211 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 212 Requires<[hasPTX60, hasSM30]>; 213 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 214 "vote.sync." # mode #" \t$dest, $pred, $mask;", 215 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 216 Requires<[hasPTX60, hasSM30]>; 217} 218 219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 223 224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 225 Operand ImmOp> { 226 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), 227 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 228 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, 229 Requires<[hasPTX60, hasSM70]>; 230 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), 231 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 232 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 233 Requires<[hasPTX60, hasSM70]>; 234 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), 235 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 236 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, 237 Requires<[hasPTX60, hasSM70]>; 238 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), 239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 240 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 241 Requires<[hasPTX60, hasSM70]>; 242} 243 244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 245 i32imm>; 246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 247 i64imm>; 248 249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 250 Operand ImmOp> { 251 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 252 (ins i32imm:$mask, ImmOp:$value), 253 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 254 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 255 Requires<[hasPTX60, hasSM70]>; 256 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 257 (ins Int32Regs:$mask, ImmOp:$value), 258 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 259 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 260 Requires<[hasPTX60, hasSM70]>; 261 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 262 (ins i32imm:$mask, regclass:$value), 263 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 264 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 265 Requires<[hasPTX60, hasSM70]>; 266 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 267 (ins Int32Regs:$mask, regclass:$value), 268 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 269 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 270 Requires<[hasPTX60, hasSM70]>; 271} 272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 273 i32imm>; 274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 275 i64imm>; 276 277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 278 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 279 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 280 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, 281 Requires<[hasPTX70, hasSM80]>; 282} 283 284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 292 293} // isConvergent = true 294 295//----------------------------------- 296// Explicit Memory Fence Functions 297//----------------------------------- 298class MEMBAR<string StrOp, Intrinsic IntOP> : 299 NVPTXInst<(outs), (ins), 300 StrOp, [(IntOP)]>; 301 302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 303def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 305 306 307//----------------------------------- 308// Async Copy Functions 309//----------------------------------- 310 311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 312 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 313 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 314 [(Intrin Int32Regs:$addr)]>, 315 Requires<[hasPTX70, hasSM80]>; 316 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 317 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 318 [(Intrin Int64Regs:$addr)]>, 319 Requires<[hasPTX70, hasSM80]>; 320} 321 322defm CP_ASYNC_MBARRIER_ARRIVE : 323 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 325 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 327 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 329 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 330 331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> { 332 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 333 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 334 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 335 Requires<[hasPTX70, hasSM80]>; 336 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 337 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 338 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 339 Requires<[hasPTX70, hasSM80]>; 340} 341 342defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 343 CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>; 344 345defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 346 CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>; 347 348defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 349 CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>; 350 351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> { 352 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 353 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 354 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 355 Requires<[hasPTX70, hasSM80]>; 356 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 357 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 358 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 359 Requires<[hasPTX70, hasSM80]>; 360} 361 362defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 363 CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>; 364 365def CP_ASYNC_COMMIT_GROUP : 366 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 367 Requires<[hasPTX70, hasSM80]>; 368 369def CP_ASYNC_WAIT_GROUP : 370 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 371 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, 372 Requires<[hasPTX70, hasSM80]>; 373 374def CP_ASYNC_WAIT_ALL : 375 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 376 [(int_nvvm_cp_async_wait_all)]>, 377 Requires<[hasPTX70, hasSM80]>; 378 379//----------------------------------- 380// MBarrier Functions 381//----------------------------------- 382 383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 384 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 385 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 386 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, 387 Requires<[hasPTX70, hasSM80]>; 388 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 389 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 390 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, 391 Requires<[hasPTX70, hasSM80]>; 392} 393 394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 396 int_nvvm_mbarrier_init_shared>; 397 398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 399 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 400 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 401 [(Intrin Int32Regs:$addr)]>, 402 Requires<[hasPTX70, hasSM80]>; 403 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 404 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 405 [(Intrin Int64Regs:$addr)]>, 406 Requires<[hasPTX70, hasSM80]>; 407} 408 409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 411 int_nvvm_mbarrier_inval_shared>; 412 413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 414 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 415 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 416 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 417 Requires<[hasPTX70, hasSM80]>; 418 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 419 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 420 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 421 Requires<[hasPTX70, hasSM80]>; 422} 423 424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 425defm MBARRIER_ARRIVE_SHARED : 426 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 427 428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 429 def _32 : NVPTXInst<(outs Int64Regs:$state), 430 (ins Int32Regs:$addr, Int32Regs:$count), 431 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 432 ".b64 $state, [$addr], $count;"), 433 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 434 Requires<[hasPTX70, hasSM80]>; 435 def _64 : NVPTXInst<(outs Int64Regs:$state), 436 (ins Int64Regs:$addr, Int32Regs:$count), 437 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 438 ".b64 $state, [$addr], $count;"), 439 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 440 Requires<[hasPTX70, hasSM80]>; 441} 442 443defm MBARRIER_ARRIVE_NOCOMPLETE : 444 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 446 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 447 448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 449 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 450 !strconcat("mbarrier.arrive_drop", AddrSpace, 451 ".b64 $state, [$addr];"), 452 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 453 Requires<[hasPTX70, hasSM80]>; 454 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 455 !strconcat("mbarrier.arrive_drop", AddrSpace, 456 ".b64 $state, [$addr];"), 457 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 458 Requires<[hasPTX70, hasSM80]>; 459} 460 461defm MBARRIER_ARRIVE_DROP : 462 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 463defm MBARRIER_ARRIVE_DROP_SHARED : 464 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 465 466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 467 def _32 : NVPTXInst<(outs Int64Regs:$state), 468 (ins Int32Regs:$addr, Int32Regs:$count), 469 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 470 ".b64 $state, [$addr], $count;"), 471 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 472 Requires<[hasPTX70, hasSM80]>; 473 def _64 : NVPTXInst<(outs Int64Regs:$state), 474 (ins Int64Regs:$addr, Int32Regs:$count), 475 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 476 ".b64 $state, [$addr], $count;"), 477 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 478 Requires<[hasPTX70, hasSM80]>; 479} 480 481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 482 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 484 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 485 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 486 487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 488 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 489 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 490 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, 491 Requires<[hasPTX70, hasSM80]>; 492 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 493 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 494 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, 495 Requires<[hasPTX70, hasSM80]>; 496} 497 498defm MBARRIER_TEST_WAIT : 499 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 500defm MBARRIER_TEST_WAIT_SHARED : 501 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 502 503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 504 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 505 "mbarrier.pending_count.b64 $res, $state;", 506 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, 507 Requires<[hasPTX70, hasSM80]>; 508 509def MBARRIER_PENDING_COUNT : 510 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 511 512//----------------------------------- 513// Math Functions 514//----------------------------------- 515 516// Map min(1.0, max(0.0, x)) to sat(x) 517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 518// NaN 519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 520// Same story for fmax, fmin. 521 522def : Pat<(int_nvvm_fmin_f immFloat1, 523 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 524 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 525def : Pat<(int_nvvm_fmin_f immFloat1, 526 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 527 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 528def : Pat<(int_nvvm_fmin_f 529 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 530 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 531def : Pat<(int_nvvm_fmin_f 532 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 533 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 534 535def : Pat<(int_nvvm_fmin_d immDouble1, 536 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 537 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 538def : Pat<(int_nvvm_fmin_d immDouble1, 539 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 540 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 541def : Pat<(int_nvvm_fmin_d 542 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 543 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 544def : Pat<(int_nvvm_fmin_d 545 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 546 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 547 548 549// We need a full string for OpcStr here because we need to deal with case like 550// INT_PTX_RECIP. 551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 552 NVPTXRegClass src_regclass, Intrinsic IntOP> 553 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 554 OpcStr, 555 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 556 557// We need a full string for OpcStr here because we need to deal with the case 558// like INT_PTX_NATIVE_POWR_F. 559class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 560 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 561 : NVPTXInst<(outs t_regclass:$dst), 562 (ins s0_regclass:$src0, s1_regclass:$src1), 563 OpcStr, 564 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 565 566class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 567 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 568 NVPTXRegClass s2_regclass, Intrinsic IntOP> 569 : NVPTXInst<(outs t_regclass:$dst), 570 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 571 OpcStr, 572 [(set t_regclass:$dst, 573 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 574 575// 576// MISC 577// 578 579def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 580 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 581 582// 583// Min Max 584// 585 586def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 587 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 588def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 589 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 590 591def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 592 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 593def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 594 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 595 596def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 597 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 598def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 599 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 600 601 602// 603// Multiplication 604// 605 606def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 607 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 608def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 609 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 610 611def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 612 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 613def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 614 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 615 616def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 617 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 618def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 619 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 620def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 621 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 622def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 623 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 624def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 625 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 626def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 627 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 628def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 629 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 630def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 631 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 632 633def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 634 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 635def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 636 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 637def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 638 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 639def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 640 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 641 642def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 643 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 644def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 645 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 646 647// 648// Div 649// 650 651def INT_NVVM_DIV_APPROX_FTZ_F 652 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 653 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 654def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 655 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 656 657def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 659def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 660 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 661def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 662 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 663def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 664 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 665def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 666 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 667def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 668 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 669def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 670 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 671def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 672 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 673 674def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 675 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 676def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 677 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 678def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 679 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 680def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 681 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 682 683// 684// Sad 685// 686 687def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 688 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 689def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 690 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 691 692// 693// Floor Ceil 694// 695 696def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 697 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 698def : Pat<(int_nvvm_floor_f Float32Regs:$a), 699 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 700def : Pat<(int_nvvm_floor_d Float64Regs:$a), 701 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 702 703def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 704 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 705def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 706 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 707def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 708 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 709 710// 711// Abs 712// 713 714def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 715 Float32Regs, int_nvvm_fabs_ftz_f>; 716def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 717 Float32Regs, int_nvvm_fabs_f>; 718 719def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 720 Float64Regs, int_nvvm_fabs_d>; 721 722// 723// Round 724// 725 726def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 727 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 728def : Pat<(int_nvvm_round_f Float32Regs:$a), 729 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 730def : Pat<(int_nvvm_round_d Float64Regs:$a), 731 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 732 733// 734// Trunc 735// 736 737def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 738 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 739def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 740 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 741def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 742 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 743 744// 745// Saturate 746// 747 748def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 749 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 750def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 751 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 752def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 753 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 754 755// 756// Exp2 Log2 757// 758 759def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 760 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 761def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 762 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 763def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 764 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 765 766def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 767 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 768def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 769 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 770def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 771 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 772 773// 774// Sin Cos 775// 776 777def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 778 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 779def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 780 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 781 782def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 783 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 784def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 785 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 786 787// 788// Fma 789// 790 791def INT_NVVM_FMA_RN_FTZ_F 792 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 793 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 794def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 795 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 796def INT_NVVM_FMA_RZ_FTZ_F 797 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 798 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 799def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 800 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 801def INT_NVVM_FMA_RM_FTZ_F 802 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 803 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 804def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 805 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 806def INT_NVVM_FMA_RP_FTZ_F 807 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 808 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 809def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 810 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 811 812def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 813 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 814def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 815 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 816def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 817 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 818def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 819 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 820 821// 822// Rcp 823// 824 825def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 826 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 827def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 828 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 829def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 830 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 831def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 832 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 833def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 834 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 835def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 836 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 837def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 838 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 839def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 840 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 841 842def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 843 Float64Regs, int_nvvm_rcp_rn_d>; 844def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 845 Float64Regs, int_nvvm_rcp_rz_d>; 846def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 847 Float64Regs, int_nvvm_rcp_rm_d>; 848def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 849 Float64Regs, int_nvvm_rcp_rp_d>; 850 851def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 852 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 853 854// 855// Sqrt 856// 857 858def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 859 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 860def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 861 Float32Regs, int_nvvm_sqrt_rn_f>; 862def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 863 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 864def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 865 Float32Regs, int_nvvm_sqrt_rz_f>; 866def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 867 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 868def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 869 Float32Regs, int_nvvm_sqrt_rm_f>; 870def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 871 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 872def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 873 Float32Regs, int_nvvm_sqrt_rp_f>; 874def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 875 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 876def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 877 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 878 879def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 880 Float64Regs, int_nvvm_sqrt_rn_d>; 881def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 882 Float64Regs, int_nvvm_sqrt_rz_d>; 883def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 884 Float64Regs, int_nvvm_sqrt_rm_d>; 885def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 886 Float64Regs, int_nvvm_sqrt_rp_d>; 887 888// nvvm_sqrt intrinsic 889def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 890 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 891def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 892 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 893def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 894 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 895def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 896 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 897 898// 899// Rsqrt 900// 901 902def INT_NVVM_RSQRT_APPROX_FTZ_F 903 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 904 int_nvvm_rsqrt_approx_ftz_f>; 905def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 906 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 907def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 908 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 909 910// 911// Add 912// 913 914def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 915 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 916def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 917 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 918def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 919 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 920def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 921 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 922def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 923 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 924def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 925 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 926def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 927 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 928def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 929 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 930 931def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 932 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 933def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 934 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 935def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 936 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 937def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 938 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 939 940// 941// Convert 942// 943 944def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 945 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 946def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 947 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 948def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 949 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 950def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 951 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 952def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 953 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 954def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 955 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 956def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 957 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 958def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 959 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 960 961def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 962 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 963def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 964 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 965def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 966 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 967def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 968 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 969 970def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 971 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 972def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 973 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 974def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 975 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 976def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 977 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 978 979def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 980 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 981def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 982 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 983def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 984 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 985def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 986 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 987 988def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 989 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 990def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 991 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 992def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 993 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 994def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 995 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 996 997def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 998 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 999def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 1000 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 1001def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 1002 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1003def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 1004 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 1005def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 1006 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1007def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 1008 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 1009def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 1010 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1011def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 1012 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 1013 1014def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 1015 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1016def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 1017 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 1018def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 1019 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1020def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 1021 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 1022def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 1023 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1024def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 1025 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 1026def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 1027 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1028def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 1029 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 1030 1031def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 1032 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 1033def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 1034 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 1035def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 1036 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 1037def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 1038 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 1039 1040def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 1041 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 1042def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 1043 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 1044def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 1045 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 1046def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 1047 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 1048 1049def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1050 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1051 1052def INT_NVVM_D2I_LO : F_MATH_1< 1053 !strconcat("{{\n\t", 1054 ".reg .b32 %temp; \n\t", 1055 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1056 "}}"), 1057 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1058def INT_NVVM_D2I_HI : F_MATH_1< 1059 !strconcat("{{\n\t", 1060 ".reg .b32 %temp; \n\t", 1061 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1062 "}}"), 1063 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1064 1065def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 1066 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1067def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 1068 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 1069def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 1070 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1071def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 1072 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 1073def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 1074 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1075def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 1076 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 1077def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 1078 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1079def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 1080 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 1081 1082def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 1083 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1084def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 1085 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 1086def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 1087 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1088def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 1089 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 1090def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 1091 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1092def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 1093 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 1094def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 1095 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1096def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 1097 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 1098 1099def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 1100 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 1101def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 1102 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 1103def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 1104 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 1105def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 1106 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 1107 1108def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 1109 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 1110def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 1111 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 1112def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 1113 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 1114def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 1115 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 1116 1117def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 1118 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 1119def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 1120 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 1121def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 1122 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 1123def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 1124 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 1125 1126def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 1127 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 1128def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 1129 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 1130def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 1131 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 1132def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 1133 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 1134 1135def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 1136 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 1137def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 1138 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 1139def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 1140 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 1141def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 1142 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 1143 1144def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 1145 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 1146def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 1147 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 1148def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 1149 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 1150def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 1151 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 1152 1153 1154def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 1155 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; 1156def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 1157 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; 1158 1159// 1160// Bitcast 1161// 1162 1163def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 1164 Float32Regs, int_nvvm_bitcast_f2i>; 1165def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 1166 Int32Regs, int_nvvm_bitcast_i2f>; 1167 1168def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1169 Int64Regs, int_nvvm_bitcast_ll2d>; 1170def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1171 Float64Regs, int_nvvm_bitcast_d2ll>; 1172 1173// 1174// FNS 1175// 1176 1177class INT_FNS_MBO<dag ins, dag Operands> 1178 : NVPTXInst<(outs Int32Regs:$dst), ins, 1179 "fns.b32 \t$dst, $mask, $base, $offset;", 1180 [(set Int32Regs:$dst, Operands )]>, 1181 Requires<[hasPTX60, hasSM30]>; 1182 1183def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1184 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1185def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1186 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1187def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1188 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1189def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1190 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1191def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1192 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1193def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1194 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1195def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1196 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1197def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1198 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1199 1200//----------------------------------- 1201// Atomic Functions 1202//----------------------------------- 1203 1204class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1205 : PatFrag<ops, frag, AS_match.global>; 1206class ATOMIC_SHARED_CHK <dag ops, dag frag> 1207 : PatFrag<ops, frag, AS_match.shared>; 1208class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1209 : PatFrag<ops, frag, AS_match.generic>; 1210 1211multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1212 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1213 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1214 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1215 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1216 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1217 Requires<Pred>; 1218 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1219 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1220 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1221 Requires<Pred>; 1222} 1223multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1224 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1225 list<Predicate> Pred = []> { 1226 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1227 IntOp, IMMType, IMM, Pred>; 1228 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1229 IntOp, IMMType, IMM, Pred>; 1230} 1231 1232// has 2 operands, neg the second one 1233multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1234 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1235 Operand IMMType, list<Predicate> Pred> { 1236 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1237 !strconcat( 1238 "{{ \n\t", 1239 ".reg \t.s", TypeStr, " temp; \n\t", 1240 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1241 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1242 "}}"), 1243 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1244 Requires<Pred>; 1245} 1246multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1247 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, 1248 list<Predicate> Pred = []> { 1249 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1250 IntOp, IMMType, Pred> ; 1251 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1252 IntOp, IMMType, Pred> ; 1253} 1254 1255// has 3 operands 1256multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1257 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1258 Operand IMMType, list<Predicate> Pred> { 1259 def reg : NVPTXInst<(outs regclass:$dst), 1260 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1261 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1262 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1263 Requires<Pred>; 1264 1265 def imm1 : NVPTXInst<(outs regclass:$dst), 1266 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1267 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1268 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1269 Requires<Pred>; 1270 1271 def imm2 : NVPTXInst<(outs regclass:$dst), 1272 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1273 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1274 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1275 Requires<Pred>; 1276 1277 def imm3 : NVPTXInst<(outs regclass:$dst), 1278 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1279 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1280 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1281 Requires<Pred>; 1282} 1283multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1284 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1285 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1286 IntOp, IMMType, Pred>; 1287 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1288 IntOp, IMMType, Pred>; 1289} 1290 1291// atom_add 1292 1293def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1294 (atomic_load_add_32 node:$a, node:$b)>; 1295def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1296 (atomic_load_add_32 node:$a, node:$b)>; 1297def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1298 (atomic_load_add_32 node:$a, node:$b)>; 1299def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1300 (atomic_load_add_64 node:$a, node:$b)>; 1301def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1302 (atomic_load_add_64 node:$a, node:$b)>; 1303def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1304 (atomic_load_add_64 node:$a, node:$b)>; 1305def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1306 (atomic_load_fadd node:$a, node:$b)>; 1307def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1308 (atomic_load_fadd node:$a, node:$b)>; 1309def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1310 (atomic_load_fadd node:$a, node:$b)>; 1311 1312defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1313 atomic_load_add_32_g, i32imm, imm>; 1314defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1315 atomic_load_add_32_s, i32imm, imm>; 1316defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1317 atomic_load_add_32_gen, i32imm, imm>; 1318defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1319 ".add", atomic_load_add_32_gen, i32imm, imm>; 1320 1321defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1322 atomic_load_add_64_g, i64imm, imm>; 1323defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1324 atomic_load_add_64_s, i64imm, imm>; 1325defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1326 atomic_load_add_64_gen, i64imm, imm>; 1327defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1328 ".add", atomic_load_add_64_gen, i64imm, imm>; 1329 1330defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1331 atomic_load_add_g, f32imm, fpimm>; 1332defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1333 atomic_load_add_s, f32imm, fpimm>; 1334defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1335 atomic_load_add_gen, f32imm, fpimm>; 1336 1337defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1338 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1339defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1340 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1341defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1342 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1343 1344// atom_sub 1345 1346def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1347 (atomic_load_sub_32 node:$a, node:$b)>; 1348def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1349 (atomic_load_sub_32 node:$a, node:$b)>; 1350def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1351 (atomic_load_sub_32 node:$a, node:$b)>; 1352def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1353 (atomic_load_sub_64 node:$a, node:$b)>; 1354def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1355 (atomic_load_sub_64 node:$a, node:$b)>; 1356def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1357 (atomic_load_sub_64 node:$a, node:$b)>; 1358 1359defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1360 atomic_load_sub_32_g, i32imm>; 1361defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1362 atomic_load_sub_64_g, i64imm>; 1363defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1364 atomic_load_sub_32_gen, i32imm>; 1365defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1366 ".add", atomic_load_sub_32_gen, i32imm>; 1367defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1368 atomic_load_sub_32_s, i32imm>; 1369defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1370 atomic_load_sub_64_s, i64imm>; 1371defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1372 atomic_load_sub_64_gen, i64imm>; 1373defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1374 ".add", atomic_load_sub_64_gen, i64imm>; 1375 1376// atom_swap 1377 1378def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1379 (atomic_swap_32 node:$a, node:$b)>; 1380def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1381 (atomic_swap_32 node:$a, node:$b)>; 1382def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1383 (atomic_swap_32 node:$a, node:$b)>; 1384def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1385 (atomic_swap_64 node:$a, node:$b)>; 1386def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1387 (atomic_swap_64 node:$a, node:$b)>; 1388def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1389 (atomic_swap_64 node:$a, node:$b)>; 1390 1391defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1392 atomic_swap_32_g, i32imm, imm>; 1393defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1394 atomic_swap_32_s, i32imm, imm>; 1395defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1396 atomic_swap_32_gen, i32imm, imm>; 1397defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1398 ".exch", atomic_swap_32_gen, i32imm, imm>; 1399defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1400 atomic_swap_64_g, i64imm, imm>; 1401defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1402 atomic_swap_64_s, i64imm, imm>; 1403defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1404 atomic_swap_64_gen, i64imm, imm>; 1405defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1406 ".exch", atomic_swap_64_gen, i64imm, imm>; 1407 1408// atom_max 1409 1410def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1411 , (atomic_load_max_32 node:$a, node:$b)>; 1412def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1413 (atomic_load_max_32 node:$a, node:$b)>; 1414def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1415 (atomic_load_max_32 node:$a, node:$b)>; 1416def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1417 , (atomic_load_max_64 node:$a, node:$b)>; 1418def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1419 (atomic_load_max_64 node:$a, node:$b)>; 1420def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1421 (atomic_load_max_64 node:$a, node:$b)>; 1422def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1423 (atomic_load_umax_32 node:$a, node:$b)>; 1424def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1425 (atomic_load_umax_32 node:$a, node:$b)>; 1426def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1427 (atomic_load_umax_32 node:$a, node:$b)>; 1428def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1429 (atomic_load_umax_64 node:$a, node:$b)>; 1430def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1431 (atomic_load_umax_64 node:$a, node:$b)>; 1432def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1433 (atomic_load_umax_64 node:$a, node:$b)>; 1434 1435defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1436 ".max", atomic_load_max_32_g, i32imm, imm>; 1437defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1438 ".max", atomic_load_max_32_s, i32imm, imm>; 1439defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1440 atomic_load_max_32_gen, i32imm, imm>; 1441defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1442 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1443defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1444 ".max", atomic_load_max_64_g, i64imm, imm>; 1445defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1446 ".max", atomic_load_max_64_s, i64imm, imm>; 1447defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1448 atomic_load_max_64_gen, i64imm, imm>; 1449defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1450 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>; 1451defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1452 ".max", atomic_load_umax_32_g, i32imm, imm>; 1453defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1454 ".max", atomic_load_umax_32_s, i32imm, imm>; 1455defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1456 atomic_load_umax_32_gen, i32imm, imm>; 1457defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1458 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1459defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1460 ".max", atomic_load_umax_64_g, i64imm, imm>; 1461defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1462 ".max", atomic_load_umax_64_s, i64imm, imm>; 1463defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1464 atomic_load_umax_64_gen, i64imm, imm>; 1465defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1466 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>; 1467 1468// atom_min 1469 1470def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1471 (atomic_load_min_32 node:$a, node:$b)>; 1472def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1473 (atomic_load_min_32 node:$a, node:$b)>; 1474def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1475 (atomic_load_min_32 node:$a, node:$b)>; 1476def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1477 (atomic_load_min_64 node:$a, node:$b)>; 1478def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1479 (atomic_load_min_64 node:$a, node:$b)>; 1480def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1481 (atomic_load_min_64 node:$a, node:$b)>; 1482def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1483 (atomic_load_umin_32 node:$a, node:$b)>; 1484def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1485 (atomic_load_umin_32 node:$a, node:$b)>; 1486def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1487 (atomic_load_umin_32 node:$a, node:$b)>; 1488def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1489 (atomic_load_umin_64 node:$a, node:$b)>; 1490def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1491 (atomic_load_umin_64 node:$a, node:$b)>; 1492def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1493 (atomic_load_umin_64 node:$a, node:$b)>; 1494 1495defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1496 ".min", atomic_load_min_32_g, i32imm, imm>; 1497defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1498 ".min", atomic_load_min_32_s, i32imm, imm>; 1499defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1500 atomic_load_min_32_gen, i32imm, imm>; 1501defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1502 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1503defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1504 ".min", atomic_load_min_64_g, i64imm, imm>; 1505defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1506 ".min", atomic_load_min_64_s, i64imm, imm>; 1507defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1508 atomic_load_min_64_gen, i64imm, imm>; 1509defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1510 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>; 1511defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1512 ".min", atomic_load_umin_32_g, i32imm, imm>; 1513defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1514 ".min", atomic_load_umin_32_s, i32imm, imm>; 1515defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1516 atomic_load_umin_32_gen, i32imm, imm>; 1517defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1518 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1519defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1520 ".min", atomic_load_umin_64_g, i64imm, imm>; 1521defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1522 ".min", atomic_load_umin_64_s, i64imm, imm>; 1523defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1524 atomic_load_umin_64_gen, i64imm, imm>; 1525defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1526 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>; 1527 1528// atom_inc atom_dec 1529 1530def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1531 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1532def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1533 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1534def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1535 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1536def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1537 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1538def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1539 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1540def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1541 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1542 1543defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1544 atomic_load_inc_32_g, i32imm, imm>; 1545defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1546 atomic_load_inc_32_s, i32imm, imm>; 1547defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1548 atomic_load_inc_32_gen, i32imm, imm>; 1549defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1550 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1551defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1552 atomic_load_dec_32_g, i32imm, imm>; 1553defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1554 atomic_load_dec_32_s, i32imm, imm>; 1555defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1556 atomic_load_dec_32_gen, i32imm, imm>; 1557defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1558 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1559 1560// atom_and 1561 1562def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1563 (atomic_load_and_32 node:$a, node:$b)>; 1564def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1565 (atomic_load_and_32 node:$a, node:$b)>; 1566def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1567 (atomic_load_and_32 node:$a, node:$b)>; 1568def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1569 (atomic_load_and_64 node:$a, node:$b)>; 1570def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1571 (atomic_load_and_64 node:$a, node:$b)>; 1572def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1573 (atomic_load_and_64 node:$a, node:$b)>; 1574 1575defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1576 atomic_load_and_32_g, i32imm, imm>; 1577defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1578 atomic_load_and_32_s, i32imm, imm>; 1579defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1580 atomic_load_and_32_gen, i32imm, imm>; 1581defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1582 ".and", atomic_load_and_32_gen, i32imm, imm>; 1583defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1584 atomic_load_and_64_g, i64imm, imm>; 1585defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1586 atomic_load_and_64_s, i64imm, imm>; 1587defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1588 atomic_load_and_64_gen, i64imm, imm>; 1589defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1590 ".and", atomic_load_and_64_gen, i64imm, imm>; 1591 1592// atom_or 1593 1594def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1595 (atomic_load_or_32 node:$a, node:$b)>; 1596def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1597 (atomic_load_or_32 node:$a, node:$b)>; 1598def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1599 (atomic_load_or_32 node:$a, node:$b)>; 1600def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1601 (atomic_load_or_64 node:$a, node:$b)>; 1602def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1603 (atomic_load_or_64 node:$a, node:$b)>; 1604def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1605 (atomic_load_or_64 node:$a, node:$b)>; 1606 1607defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1608 atomic_load_or_32_g, i32imm, imm>; 1609defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1610 atomic_load_or_32_gen, i32imm, imm>; 1611defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1612 ".or", atomic_load_or_32_gen, i32imm, imm>; 1613defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1614 atomic_load_or_32_s, i32imm, imm>; 1615defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1616 atomic_load_or_64_g, i64imm, imm>; 1617defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1618 atomic_load_or_64_gen, i64imm, imm>; 1619defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1620 ".or", atomic_load_or_64_gen, i64imm, imm>; 1621defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1622 atomic_load_or_64_s, i64imm, imm>; 1623 1624// atom_xor 1625 1626def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1627 (atomic_load_xor_32 node:$a, node:$b)>; 1628def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1629 (atomic_load_xor_32 node:$a, node:$b)>; 1630def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1631 (atomic_load_xor_32 node:$a, node:$b)>; 1632def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1633 (atomic_load_xor_64 node:$a, node:$b)>; 1634def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1635 (atomic_load_xor_64 node:$a, node:$b)>; 1636def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1637 (atomic_load_xor_64 node:$a, node:$b)>; 1638 1639defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1640 atomic_load_xor_32_g, i32imm, imm>; 1641defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1642 atomic_load_xor_32_s, i32imm, imm>; 1643defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1644 atomic_load_xor_32_gen, i32imm, imm>; 1645defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1646 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1647defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1648 atomic_load_xor_64_g, i64imm, imm>; 1649defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1650 atomic_load_xor_64_s, i64imm, imm>; 1651defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1652 atomic_load_xor_64_gen, i64imm, imm>; 1653defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1654 ".xor", atomic_load_xor_64_gen, i64imm, imm>; 1655 1656// atom_cas 1657 1658def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1659 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1660def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1661 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1662def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1663 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1664def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1665 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1666def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1667 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1668def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1669 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1670 1671defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1672 atomic_cmp_swap_32_g, i32imm>; 1673defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1674 atomic_cmp_swap_32_s, i32imm>; 1675defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1676 atomic_cmp_swap_32_gen, i32imm>; 1677defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1678 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1679defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1680 atomic_cmp_swap_64_g, i64imm>; 1681defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1682 atomic_cmp_swap_64_s, i64imm>; 1683defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1684 atomic_cmp_swap_64_gen, i64imm>; 1685defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1686 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1687 1688// Support for scoped atomic operations. Matches 1689// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1690// and converts it into the appropriate instruction. 1691// NOTE: not all possible combinations are implemented 1692// 'space' is limited to generic as it's the only one needed to support CUDA. 1693// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1694class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1695 dag ins, dag Operands> 1696 : NVPTXInst<(outs regclass:$result), ins, 1697 AsmStr, 1698 [(set regclass:$result, Operands)]>, 1699 Requires<Preds>; 1700 1701// Define instruction variants for all addressing modes. 1702multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1703 NVPTXRegClass regclass, Operand ImmType, 1704 SDNode Imm, ValueType ImmTy, 1705 list<Predicate> Preds> { 1706 let AddedComplexity = 1 in { 1707 def : ATOM23_impl<AsmStr, regclass, Preds, 1708 (ins Int32Regs:$src, regclass:$b), 1709 (Intr Int32Regs:$src, regclass:$b)>; 1710 def : ATOM23_impl<AsmStr, regclass, Preds, 1711 (ins Int64Regs:$src, regclass:$b), 1712 (Intr Int64Regs:$src, regclass:$b)>; 1713 } 1714 // tablegen can't infer argument types from Intrinsic (though it can 1715 // from Instruction) so we have to enforce specific type on 1716 // immediates via explicit cast to ImmTy. 1717 def : ATOM23_impl<AsmStr, regclass, Preds, 1718 (ins Int32Regs:$src, ImmType:$b), 1719 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1720 def : ATOM23_impl<AsmStr, regclass, Preds, 1721 (ins Int64Regs:$src, ImmType:$b), 1722 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1723} 1724 1725multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1726 NVPTXRegClass regclass, Operand ImmType, 1727 SDNode Imm, ValueType ImmTy, 1728 list<Predicate> Preds> { 1729 // Variants for register/immediate permutations of $b and $c 1730 let AddedComplexity = 2 in { 1731 def : ATOM23_impl<AsmStr, regclass, Preds, 1732 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1733 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1734 def : ATOM23_impl<AsmStr, regclass, Preds, 1735 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1736 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1737 } 1738 let AddedComplexity = 1 in { 1739 def : ATOM23_impl<AsmStr, regclass, Preds, 1740 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1741 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1742 def : ATOM23_impl<AsmStr, regclass, Preds, 1743 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1744 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1745 def : ATOM23_impl<AsmStr, regclass, Preds, 1746 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1747 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1748 def : ATOM23_impl<AsmStr, regclass, Preds, 1749 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 1750 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1751 } 1752 def : ATOM23_impl<AsmStr, regclass, Preds, 1753 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 1754 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1755 def : ATOM23_impl<AsmStr, regclass, Preds, 1756 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 1757 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1758} 1759 1760// Constructs instrinsic name and instruction asm strings. 1761multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 1762 string ScopeStr, string SpaceStr, 1763 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1764 ValueType ImmTy, list<Predicate> Preds> { 1765 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1766 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1767 # "." # OpStr # "." # TypeStr 1768 # " \t$result, [$src], $b;", 1769 !cast<Intrinsic>( 1770 "int_nvvm_atomic_" # OpStr 1771 # "_" # SpaceStr # "_" # IntTypeStr 1772 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1773 regclass, ImmType, Imm, ImmTy, Preds>; 1774} 1775multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 1776 string ScopeStr, string SpaceStr, 1777 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1778 ValueType ImmTy, list<Predicate> Preds> { 1779 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1780 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1781 # "." # OpStr # "." # TypeStr 1782 # " \t$result, [$src], $b, $c;", 1783 !cast<Intrinsic>( 1784 "int_nvvm_atomic_" # OpStr 1785 # "_" # SpaceStr # "_" # IntTypeStr 1786 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1787 regclass, ImmType, Imm, ImmTy, Preds>; 1788} 1789 1790// Constructs variants for different address spaces. 1791// For now we only need variants for generic space pointers. 1792multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 1793 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1794 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1795 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1796 regclass, ImmType, Imm, ImmTy, Preds>; 1797} 1798multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 1799 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1800 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1801 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1802 regclass, ImmType, Imm, ImmTy, Preds>; 1803} 1804 1805// Constructs variants for different scopes of atomic op. 1806multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 1807 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1808 ValueType ImmTy, list<Predicate> Preds> { 1809 // .gpu scope is default and is currently covered by existing 1810 // atomics w/o explicitly specified scope. 1811 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1812 regclass, ImmType, Imm, ImmTy, 1813 !listconcat(Preds,[hasAtomScope])>; 1814 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1815 regclass, ImmType, Imm, ImmTy, 1816 !listconcat(Preds,[hasAtomScope])>; 1817} 1818multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 1819 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 1820 list<Predicate> Preds> { 1821 // No need to define ".gpu"-scoped atomics. They do the same thing 1822 // as the regular, non-scoped atomics defined elsewhere. 1823 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1824 regclass, ImmType, Imm, ImmTy, 1825 !listconcat(Preds,[hasAtomScope])>; 1826 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1827 regclass, ImmType, Imm, ImmTy, 1828 !listconcat(Preds,[hasAtomScope])>; 1829} 1830 1831// atom.add 1832multiclass ATOM2_add_impl<string OpStr> { 1833 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1834 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1835 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 1836 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 1837 []>; 1838 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 1839 [hasAtomAddF64]>; 1840} 1841 1842// atom.{and,or,xor} 1843multiclass ATOM2_bitwise_impl<string OpStr> { 1844 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1845 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 1846 [hasAtomBitwise64]>; 1847} 1848 1849// atom.exch 1850multiclass ATOM2_exch_impl<string OpStr> { 1851 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1852 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1853} 1854 1855// atom.{min,max} 1856multiclass ATOM2_minmax_impl<string OpStr> { 1857 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1858 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1859 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 1860 [hasAtomMinMax64]>; 1861 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 1862 [hasAtomMinMax64]>; 1863} 1864 1865// atom.{inc,dec} 1866multiclass ATOM2_incdec_impl<string OpStr> { 1867 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1868} 1869 1870// atom.cas 1871multiclass ATOM3_cas_impl<string OpStr> { 1872 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1873 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1874} 1875 1876defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 1877defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 1878defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 1879defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 1880defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 1881defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 1882defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 1883defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 1884defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 1885defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 1886 1887//----------------------------------- 1888// Support for ldu on sm_20 or later 1889//----------------------------------- 1890 1891// Don't annotate ldu instructions as mayLoad, as they load from memory that is 1892// read-only in a kernel. 1893 1894// Scalar 1895 1896multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 1897 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1898 !strconcat("ldu.global.", TyStr), 1899 []>, Requires<[hasLDU]>; 1900 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1901 !strconcat("ldu.global.", TyStr), 1902 []>, Requires<[hasLDU]>; 1903 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1904 !strconcat("ldu.global.", TyStr), 1905 []>, Requires<[hasLDU]>; 1906 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1907 !strconcat("ldu.global.", TyStr), 1908 []>, Requires<[hasLDU]>; 1909 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1910 !strconcat("ldu.global.", TyStr), 1911 []>, Requires<[hasLDU]>; 1912} 1913 1914defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 1915defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 1916defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1917defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1918defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; 1919defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; 1920defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 1921defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 1922defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1923defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1924 1925// vector 1926 1927// Elementized vector ldu 1928multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1929 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1930 (ins Int32Regs:$src), 1931 !strconcat("ldu.global.", TyStr), []>; 1932 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1933 (ins Int64Regs:$src), 1934 !strconcat("ldu.global.", TyStr), []>; 1935 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1936 (ins MEMri:$src), 1937 !strconcat("ldu.global.", TyStr), []>; 1938 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1939 (ins MEMri64:$src), 1940 !strconcat("ldu.global.", TyStr), []>; 1941 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1942 (ins imemAny:$src), 1943 !strconcat("ldu.global.", TyStr), []>; 1944} 1945 1946multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1947 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1948 regclass:$dst4), (ins Int32Regs:$src), 1949 !strconcat("ldu.global.", TyStr), []>; 1950 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1951 regclass:$dst4), (ins Int64Regs:$src), 1952 !strconcat("ldu.global.", TyStr), []>; 1953 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1954 regclass:$dst4), (ins MEMri:$src), 1955 !strconcat("ldu.global.", TyStr), []>; 1956 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1957 regclass:$dst4), (ins MEMri64:$src), 1958 !strconcat("ldu.global.", TyStr), []>; 1959 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1960 regclass:$dst4), (ins imemAny:$src), 1961 !strconcat("ldu.global.", TyStr), []>; 1962} 1963 1964defm INT_PTX_LDU_G_v2i8_ELE 1965 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1966defm INT_PTX_LDU_G_v2i16_ELE 1967 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1968defm INT_PTX_LDU_G_v2i32_ELE 1969 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1970defm INT_PTX_LDU_G_v2f16_ELE 1971 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1972defm INT_PTX_LDU_G_v2f16x2_ELE 1973 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1974defm INT_PTX_LDU_G_v2f32_ELE 1975 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1976defm INT_PTX_LDU_G_v2i64_ELE 1977 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1978defm INT_PTX_LDU_G_v2f64_ELE 1979 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1980defm INT_PTX_LDU_G_v4i8_ELE 1981 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1982defm INT_PTX_LDU_G_v4i16_ELE 1983 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1984 Int16Regs>; 1985defm INT_PTX_LDU_G_v4i32_ELE 1986 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1987 Int32Regs>; 1988defm INT_PTX_LDU_G_v4f16_ELE 1989 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1990 Float16Regs>; 1991defm INT_PTX_LDU_G_v4f16x2_ELE 1992 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1993 Float16x2Regs>; 1994defm INT_PTX_LDU_G_v4f32_ELE 1995 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1996 Float32Regs>; 1997 1998 1999//----------------------------------- 2000// Support for ldg on sm_35 or later 2001//----------------------------------- 2002 2003// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2004// non-coherent texture cache, and therefore the values read must be read-only 2005// during the lifetime of the kernel. 2006 2007multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2008 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2009 !strconcat("ld.global.nc.", TyStr), 2010 []>, Requires<[hasLDG]>; 2011 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2012 !strconcat("ld.global.nc.", TyStr), 2013 []>, Requires<[hasLDG]>; 2014 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2015 !strconcat("ld.global.nc.", TyStr), 2016 []>, Requires<[hasLDG]>; 2017 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2018 !strconcat("ld.global.nc.", TyStr), 2019 []>, Requires<[hasLDG]>; 2020 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2021 !strconcat("ld.global.nc.", TyStr), 2022 []>, Requires<[hasLDG]>; 2023} 2024 2025defm INT_PTX_LDG_GLOBAL_i8 2026 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2027defm INT_PTX_LDG_GLOBAL_i16 2028 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2029defm INT_PTX_LDG_GLOBAL_i32 2030 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2031defm INT_PTX_LDG_GLOBAL_i64 2032 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2033defm INT_PTX_LDG_GLOBAL_f16 2034 : LDG_G<"b16 \t$result, [$src];", Float16Regs>; 2035defm INT_PTX_LDG_GLOBAL_f16x2 2036 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; 2037defm INT_PTX_LDG_GLOBAL_f32 2038 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2039defm INT_PTX_LDG_GLOBAL_f64 2040 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2041defm INT_PTX_LDG_GLOBAL_p32 2042 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2043defm INT_PTX_LDG_GLOBAL_p64 2044 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2045 2046// vector 2047 2048// Elementized vector ldg 2049multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2050 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2051 (ins Int32Regs:$src), 2052 !strconcat("ld.global.nc.", TyStr), []>; 2053 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2054 (ins Int64Regs:$src), 2055 !strconcat("ld.global.nc.", TyStr), []>; 2056 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2057 (ins MEMri:$src), 2058 !strconcat("ld.global.nc.", TyStr), []>; 2059 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2060 (ins MEMri64:$src), 2061 !strconcat("ld.global.nc.", TyStr), []>; 2062 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2063 (ins imemAny:$src), 2064 !strconcat("ld.global.nc.", TyStr), []>; 2065} 2066 2067multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2068 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2069 regclass:$dst4), (ins Int32Regs:$src), 2070 !strconcat("ld.global.nc.", TyStr), []>; 2071 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2072 regclass:$dst4), (ins Int64Regs:$src), 2073 !strconcat("ld.global.nc.", TyStr), []>; 2074 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2075 regclass:$dst4), (ins MEMri:$src), 2076 !strconcat("ld.global.nc.", TyStr), []>; 2077 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2078 regclass:$dst4), (ins MEMri64:$src), 2079 !strconcat("ld.global.nc.", TyStr), []>; 2080 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2081 regclass:$dst4), (ins imemAny:$src), 2082 !strconcat("ld.global.nc.", TyStr), []>; 2083} 2084 2085// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2086defm INT_PTX_LDG_G_v2i8_ELE 2087 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2088defm INT_PTX_LDG_G_v2i16_ELE 2089 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2090defm INT_PTX_LDG_G_v2i32_ELE 2091 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2092defm INT_PTX_LDG_G_v2f16_ELE 2093 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 2094defm INT_PTX_LDG_G_v2f16x2_ELE 2095 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 2096defm INT_PTX_LDG_G_v2f32_ELE 2097 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2098defm INT_PTX_LDG_G_v2i64_ELE 2099 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2100defm INT_PTX_LDG_G_v2f64_ELE 2101 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2102defm INT_PTX_LDG_G_v4i8_ELE 2103 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2104defm INT_PTX_LDG_G_v4i16_ELE 2105 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2106defm INT_PTX_LDG_G_v4i32_ELE 2107 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2108defm INT_PTX_LDG_G_v4f16_ELE 2109 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; 2110defm INT_PTX_LDG_G_v4f16x2_ELE 2111 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; 2112defm INT_PTX_LDG_G_v4f32_ELE 2113 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2114 2115 2116multiclass NG_TO_G<string Str, Intrinsic Intrin> { 2117 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2118 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 2119 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2120 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2121 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 2122 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2123 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 2124 "{{ .reg .b64 %tmp;\n\t" 2125 #" cvt.u64.u32 \t%tmp, $src;\n\t" 2126 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 2127 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 2128 Requires<[useShortPtr]>; 2129} 2130 2131multiclass G_TO_NG<string Str, Intrinsic Intrin> { 2132 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2133 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 2134 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2135 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2136 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 2137 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2138 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 2139 "{{ .reg .b64 %tmp;\n\t" 2140 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 2141 #" cvt.u32.u64 \t$result, %tmp; }}", 2142 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 2143 Requires<[useShortPtr]>; 2144} 2145 2146defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 2147defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 2148defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 2149defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 2150 2151defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 2152defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 2153defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 2154defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 2155 2156 2157// nvvm.ptr.gen.to.param 2158def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 2159 (ins Int32Regs:$src), 2160 "mov.u32 \t$result, $src;", 2161 [(set Int32Regs:$result, 2162 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 2163def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 2164 (ins Int64Regs:$src), 2165 "mov.u64 \t$result, $src;", 2166 [(set Int64Regs:$result, 2167 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2168 2169 2170// nvvm.move intrinsicc 2171def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2172 "mov.b16 \t$r, $s;", 2173 [(set Int16Regs:$r, 2174 (int_nvvm_move_i16 Int16Regs:$s))]>; 2175def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2176 "mov.b32 \t$r, $s;", 2177 [(set Int32Regs:$r, 2178 (int_nvvm_move_i32 Int32Regs:$s))]>; 2179def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2180 "mov.b64 \t$r, $s;", 2181 [(set Int64Regs:$r, 2182 (int_nvvm_move_i64 Int64Regs:$s))]>; 2183def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2184 "mov.f32 \t$r, $s;", 2185 [(set Float32Regs:$r, 2186 (int_nvvm_move_float Float32Regs:$s))]>; 2187def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2188 "mov.f64 \t$r, $s;", 2189 [(set Float64Regs:$r, 2190 (int_nvvm_move_double Float64Regs:$s))]>; 2191def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2192 "mov.u32 \t$r, $s;", 2193 [(set Int32Regs:$r, 2194 (int_nvvm_move_ptr Int32Regs:$s))]>; 2195def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2196 "mov.u64 \t$r, $s;", 2197 [(set Int64Regs:$r, 2198 (int_nvvm_move_ptr Int64Regs:$s))]>; 2199 2200// @TODO: Are these actually needed, or will we always just see symbols 2201// copied to registers first? 2202/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2203 "mov.u32 \t$r, $s;", 2204 [(set Int32Regs:$r, 2205 (int_nvvm_move_ptr texternalsym:$s))]>; 2206def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2207 "mov.u64 \t$r, $s;", 2208 [(set Int64Regs:$r, 2209 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2210 2211 2212// MoveParam %r1, param 2213// ptr_local_to_gen %r2, %r1 2214// ptr_gen_to_local %r3, %r2 2215// -> 2216// mov %r1, param 2217 2218// @TODO: Revisit this. There is a type 2219// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2220// instructions are not currently defined. However, we can use the ptr 2221// variants and the asm printer will do the right thing. 2222def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2223 (MoveParam texternalsym:$src)))), 2224 (nvvm_move_ptr64 texternalsym:$src)>; 2225def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2226 (MoveParam texternalsym:$src)))), 2227 (nvvm_move_ptr32 texternalsym:$src)>; 2228 2229def texsurf_handles 2230 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2231 "mov.u64 \t$result, $src;", []>; 2232 2233//----------------------------------- 2234// Compiler Error Warn 2235// - Just ignore them in codegen 2236//----------------------------------- 2237 2238def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2239 "// llvm.nvvm.compiler.warn()", 2240 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2241def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2242 "// llvm.nvvm.compiler.warn()", 2243 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2244def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2245 "// llvm.nvvm.compiler.error()", 2246 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2247def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2248 "// llvm.nvvm.compiler.error()", 2249 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2250 2251 2252// isspacep 2253 2254def ISSPACEP_CONST_32 2255 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2256 "isspacep.const \t$d, $a;", 2257 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 2258 Requires<[hasPTX31]>; 2259def ISSPACEP_CONST_64 2260 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2261 "isspacep.const \t$d, $a;", 2262 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 2263 Requires<[hasPTX31]>; 2264def ISSPACEP_GLOBAL_32 2265 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2266 "isspacep.global \t$d, $a;", 2267 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 2268def ISSPACEP_GLOBAL_64 2269 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2270 "isspacep.global \t$d, $a;", 2271 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 2272def ISSPACEP_LOCAL_32 2273 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2274 "isspacep.local \t$d, $a;", 2275 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 2276def ISSPACEP_LOCAL_64 2277 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2278 "isspacep.local \t$d, $a;", 2279 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 2280def ISSPACEP_SHARED_32 2281 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2282 "isspacep.shared \t$d, $a;", 2283 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 2284def ISSPACEP_SHARED_64 2285 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2286 "isspacep.shared \t$d, $a;", 2287 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 2288 2289 2290// Special register reads 2291def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2292 (ins SpecialRegs:$r), 2293 "mov.b32 \t$d, $r;", []>; 2294 2295def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2296def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2297def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2298def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2299def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2300def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2301def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2302def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2303def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2304def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2305def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2306def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2307def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2308def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2309def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2310def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2311def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2312def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2313def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2314def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2315def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2316def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2317def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2318def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2319def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2320def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2321def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2322def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2323def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2324def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2325def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2326def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2327 2328 2329// rotate builtin support 2330 2331def ROTATE_B32_HW_IMM 2332 : NVPTXInst<(outs Int32Regs:$dst), 2333 (ins Int32Regs:$src, i32imm:$amt), 2334 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2335 [(set Int32Regs:$dst, 2336 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2337 Requires<[hasHWROT32]> ; 2338 2339def ROTATE_B32_HW_REG 2340 : NVPTXInst<(outs Int32Regs:$dst), 2341 (ins Int32Regs:$src, Int32Regs:$amt), 2342 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2343 [(set Int32Regs:$dst, 2344 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2345 Requires<[hasHWROT32]> ; 2346 2347def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2348 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2349 Requires<[noHWROT32]> ; 2350 2351def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2352 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2353 Requires<[noHWROT32]> ; 2354 2355let hasSideEffects = false in { 2356 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2357 !strconcat("{{\n\t", 2358 ".reg .b32 %dummy;\n\t", 2359 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2360 "}}"), 2361 []> ; 2362 2363 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2364 !strconcat("{{\n\t", 2365 ".reg .b32 %dummy;\n\t", 2366 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2367 "}}"), 2368 []> ; 2369} 2370 2371let hasSideEffects = false in { 2372 def PACK_TWO_INT32 2373 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2374 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2375} 2376 2377def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2378 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2379 (GET_LO_INT64 Int64Regs:$src))> ; 2380 2381// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2382// no side effects. 2383let hasSideEffects = false in { 2384 def SHF_L_WRAP_B32_IMM 2385 : NVPTXInst<(outs Int32Regs:$dst), 2386 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2387 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2388 Requires<[hasHWROT32]>; 2389 2390 def SHF_L_WRAP_B32_REG 2391 : NVPTXInst<(outs Int32Regs:$dst), 2392 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2393 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2394 Requires<[hasHWROT32]>; 2395 2396 def SHF_R_WRAP_B32_IMM 2397 : NVPTXInst<(outs Int32Regs:$dst), 2398 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2399 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2400 Requires<[hasHWROT32]>; 2401 2402 def SHF_R_WRAP_B32_REG 2403 : NVPTXInst<(outs Int32Regs:$dst), 2404 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2405 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2406 Requires<[hasHWROT32]>; 2407} 2408 2409// HW version of rotate 64 2410def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2411 (PACK_TWO_INT32 2412 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2413 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2414 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2415 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2416 Requires<[hasHWROT32]>; 2417 2418def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2419 (PACK_TWO_INT32 2420 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2421 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2422 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2423 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2424 Requires<[hasHWROT32]>; 2425 2426 2427def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2428 (PACK_TWO_INT32 2429 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2430 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2431 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2432 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2433 Requires<[hasHWROT32]>; 2434 2435def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2436 (PACK_TWO_INT32 2437 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2438 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2439 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2440 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2441 Requires<[hasHWROT32]>; 2442 2443// SW version of rotate 64 2444def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2445 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2446 Requires<[noHWROT32]>; 2447def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2448 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2449 Requires<[noHWROT32]>; 2450def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2451 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2452 Requires<[noHWROT32]>; 2453def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2454 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2455 Requires<[noHWROT32]>; 2456 2457 2458//----------------------------------- 2459// Texture Intrinsics 2460//----------------------------------- 2461 2462// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2463// also defined in NVPTXReplaceImageHandles.cpp 2464 2465// texmode_independent 2466let IsTex = true, IsTexModeUnified = false in { 2467// Texture fetch instructions using handles 2468def TEX_1D_F32_S32 2469 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2470 Float32Regs:$b, Float32Regs:$a), 2471 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2472 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2473 []>; 2474def TEX_1D_F32_F32 2475 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2476 Float32Regs:$b, Float32Regs:$a), 2477 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2478 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2479 []>; 2480def TEX_1D_F32_F32_LEVEL 2481 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2482 Float32Regs:$b, Float32Regs:$a), 2483 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), 2484 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2485 "[$t, $s, \\{$x\\}], $lod;", 2486 []>; 2487def TEX_1D_F32_F32_GRAD 2488 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2489 Float32Regs:$b, Float32Regs:$a), 2490 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2491 Float32Regs:$gradx, Float32Regs:$grady), 2492 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2493 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2494 []>; 2495def TEX_1D_S32_S32 2496 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2497 Int32Regs:$b, Int32Regs:$a), 2498 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2499 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2500 []>; 2501def TEX_1D_S32_F32 2502 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2503 Int32Regs:$b, Int32Regs:$a), 2504 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2505 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2506 []>; 2507def TEX_1D_S32_F32_LEVEL 2508 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2509 Int32Regs:$b, Int32Regs:$a), 2510 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2511 Float32Regs:$lod), 2512 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2513 "[$t, $s, \\{$x\\}], $lod;", 2514 []>; 2515def TEX_1D_S32_F32_GRAD 2516 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2517 Int32Regs:$b, Int32Regs:$a), 2518 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2519 Float32Regs:$gradx, Float32Regs:$grady), 2520 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2521 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2522 []>; 2523def TEX_1D_U32_S32 2524 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2525 Int32Regs:$b, Int32Regs:$a), 2526 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2527 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2528 []>; 2529def TEX_1D_U32_F32 2530 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2531 Int32Regs:$b, Int32Regs:$a), 2532 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2533 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2534 []>; 2535def TEX_1D_U32_F32_LEVEL 2536 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2537 Int32Regs:$b, Int32Regs:$a), 2538 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2539 Float32Regs:$lod), 2540 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2541 "[$t, $s, \\{$x\\}], $lod;", 2542 []>; 2543def TEX_1D_U32_F32_GRAD 2544 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2545 Int32Regs:$b, Int32Regs:$a), 2546 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2547 Float32Regs:$gradx, Float32Regs:$grady), 2548 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2549 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2550 []>; 2551 2552def TEX_1D_ARRAY_F32_S32 2553 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2554 Float32Regs:$b, Float32Regs:$a), 2555 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2556 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2557 "[$t, $s, \\{$l, $x\\}];", 2558 []>; 2559def TEX_1D_ARRAY_F32_F32 2560 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2561 Float32Regs:$b, Float32Regs:$a), 2562 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2563 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2564 "[$t, $s, \\{$l, $x\\}];", 2565 []>; 2566def TEX_1D_ARRAY_F32_F32_LEVEL 2567 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2568 Float32Regs:$b, Float32Regs:$a), 2569 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2570 Float32Regs:$lod), 2571 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2572 "[$t, $s, \\{$l, $x\\}], $lod;", 2573 []>; 2574def TEX_1D_ARRAY_F32_F32_GRAD 2575 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2576 Float32Regs:$b, Float32Regs:$a), 2577 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2578 Float32Regs:$gradx, Float32Regs:$grady), 2579 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2580 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2581 []>; 2582def TEX_1D_ARRAY_S32_S32 2583 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2584 Int32Regs:$b, Int32Regs:$a), 2585 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2586 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2587 "[$t, $s, \\{$l, $x\\}];", 2588 []>; 2589def TEX_1D_ARRAY_S32_F32 2590 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2591 Int32Regs:$b, Int32Regs:$a), 2592 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2593 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2594 "[$t, $s, \\{$l, $x\\}];", 2595 []>; 2596def TEX_1D_ARRAY_S32_F32_LEVEL 2597 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2598 Int32Regs:$b, Int32Regs:$a), 2599 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2600 Float32Regs:$lod), 2601 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2602 "[$t, $s, \\{$l, $x\\}], $lod;", 2603 []>; 2604def TEX_1D_ARRAY_S32_F32_GRAD 2605 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2606 Int32Regs:$b, Int32Regs:$a), 2607 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2608 Float32Regs:$gradx, Float32Regs:$grady), 2609 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2610 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2611 []>; 2612def TEX_1D_ARRAY_U32_S32 2613 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2614 Int32Regs:$b, Int32Regs:$a), 2615 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2616 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2617 "[$t, $s, \\{$l, $x\\}];", 2618 []>; 2619def TEX_1D_ARRAY_U32_F32 2620 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2621 Int32Regs:$b, Int32Regs:$a), 2622 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2623 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2624 "[$t, $s, \\{$l, $x\\}];", 2625 []>; 2626def TEX_1D_ARRAY_U32_F32_LEVEL 2627 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2628 Int32Regs:$b, Int32Regs:$a), 2629 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2630 Float32Regs:$lod), 2631 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2632 "[$t, $s, \\{$l, $x\\}], $lod;", 2633 []>; 2634def TEX_1D_ARRAY_U32_F32_GRAD 2635 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2636 Int32Regs:$b, Int32Regs:$a), 2637 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2638 Float32Regs:$gradx, Float32Regs:$grady), 2639 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2640 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2641 []>; 2642 2643def TEX_2D_F32_S32 2644 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2645 Float32Regs:$b, Float32Regs:$a), 2646 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2647 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2648 "[$t, $s, \\{$x, $y\\}];", 2649 []>; 2650def TEX_2D_F32_F32 2651 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2652 Float32Regs:$b, Float32Regs:$a), 2653 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2654 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2655 "[$t, $s, \\{$x, $y\\}];", 2656 []>; 2657def TEX_2D_F32_F32_LEVEL 2658 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2659 Float32Regs:$b, Float32Regs:$a), 2660 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2661 Float32Regs:$lod), 2662 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2663 "[$t, $s, \\{$x, $y\\}], $lod;", 2664 []>; 2665def TEX_2D_F32_F32_GRAD 2666 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2667 Float32Regs:$b, Float32Regs:$a), 2668 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2669 Float32Regs:$gradx0, Float32Regs:$gradx1, 2670 Float32Regs:$grady0, Float32Regs:$grady1), 2671 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2672 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2673 "\\{$grady0, $grady1\\};", 2674 []>; 2675def TEX_2D_S32_S32 2676 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2677 Int32Regs:$b, Int32Regs:$a), 2678 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2679 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2680 "[$t, $s, \\{$x, $y\\}];", 2681 []>; 2682def TEX_2D_S32_F32 2683 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2684 Int32Regs:$b, Int32Regs:$a), 2685 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2686 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2687 "[$t, $s, \\{$x, $y\\}];", 2688 []>; 2689def TEX_2D_S32_F32_LEVEL 2690 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2691 Int32Regs:$b, Int32Regs:$a), 2692 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2693 Float32Regs:$lod), 2694 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2695 "[$t, $s, \\{$x, $y\\}], $lod;", 2696 []>; 2697def TEX_2D_S32_F32_GRAD 2698 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2699 Int32Regs:$b, Int32Regs:$a), 2700 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2701 Float32Regs:$gradx0, Float32Regs:$gradx1, 2702 Float32Regs:$grady0, Float32Regs:$grady1), 2703 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2704 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2705 "\\{$grady0, $grady1\\};", 2706 []>; 2707def TEX_2D_U32_S32 2708 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2709 Int32Regs:$b, Int32Regs:$a), 2710 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2711 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2712 "[$t, $s, \\{$x, $y\\}];", 2713 []>; 2714def TEX_2D_U32_F32 2715 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2716 Int32Regs:$b, Int32Regs:$a), 2717 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2718 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2719 "[$t, $s, \\{$x, $y\\}];", 2720 []>; 2721def TEX_2D_U32_F32_LEVEL 2722 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2723 Int32Regs:$b, Int32Regs:$a), 2724 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2725 Float32Regs:$lod), 2726 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2727 "[$t, $s, \\{$x, $y\\}], $lod;", 2728 []>; 2729def TEX_2D_U32_F32_GRAD 2730 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2731 Int32Regs:$b, Int32Regs:$a), 2732 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2733 Float32Regs:$gradx0, Float32Regs:$gradx1, 2734 Float32Regs:$grady0, Float32Regs:$grady1), 2735 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2736 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2737 "\\{$grady0, $grady1\\};", 2738 []>; 2739 2740def TEX_2D_ARRAY_F32_S32 2741 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2742 Float32Regs:$b, Float32Regs:$a), 2743 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2744 Int32Regs:$y), 2745 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2746 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2747 []>; 2748def TEX_2D_ARRAY_F32_F32 2749 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2750 Float32Regs:$b, Float32Regs:$a), 2751 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2752 Float32Regs:$y), 2753 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2754 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2755 []>; 2756def TEX_2D_ARRAY_F32_F32_LEVEL 2757 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2758 Float32Regs:$b, Float32Regs:$a), 2759 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2760 Float32Regs:$y, Float32Regs:$lod), 2761 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2762 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2763 []>; 2764def TEX_2D_ARRAY_F32_F32_GRAD 2765 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2766 Float32Regs:$b, Float32Regs:$a), 2767 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2768 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 2769 Float32Regs:$grady0, Float32Regs:$grady1), 2770 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2771 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2772 "\\{$grady0, $grady1\\};", 2773 []>; 2774def TEX_2D_ARRAY_S32_S32 2775 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2776 Int32Regs:$b, Int32Regs:$a), 2777 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2778 Int32Regs:$y), 2779 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2780 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2781 []>; 2782def TEX_2D_ARRAY_S32_F32 2783 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2784 Int32Regs:$b, Int32Regs:$a), 2785 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2786 Float32Regs:$y), 2787 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2788 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2789 []>; 2790def TEX_2D_ARRAY_S32_F32_LEVEL 2791 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2792 Int32Regs:$b, Int32Regs:$a), 2793 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2794 Float32Regs:$y, Float32Regs:$lod), 2795 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2796 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2797 []>; 2798def TEX_2D_ARRAY_S32_F32_GRAD 2799 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2800 Int32Regs:$b, Int32Regs:$a), 2801 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2802 Float32Regs:$y, 2803 Float32Regs:$gradx0, Float32Regs:$gradx1, 2804 Float32Regs:$grady0, Float32Regs:$grady1), 2805 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2806 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2807 "\\{$grady0, $grady1\\};", 2808 []>; 2809def TEX_2D_ARRAY_U32_S32 2810 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2811 Int32Regs:$b, Int32Regs:$a), 2812 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2813 Int32Regs:$y), 2814 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2815 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2816 []>; 2817def TEX_2D_ARRAY_U32_F32 2818 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2819 Int32Regs:$b, Int32Regs:$a), 2820 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2821 Float32Regs:$y), 2822 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2823 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2824 []>; 2825def TEX_2D_ARRAY_U32_F32_LEVEL 2826 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2827 Int32Regs:$b, Int32Regs:$a), 2828 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2829 Float32Regs:$y, Float32Regs:$lod), 2830 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2831 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2832 []>; 2833def TEX_2D_ARRAY_U32_F32_GRAD 2834 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2835 Int32Regs:$b, Int32Regs:$a), 2836 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2837 Float32Regs:$y, 2838 Float32Regs:$gradx0, Float32Regs:$gradx1, 2839 Float32Regs:$grady0, Float32Regs:$grady1), 2840 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2841 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2842 "\\{$grady0, $grady1\\};", 2843 []>; 2844 2845def TEX_3D_F32_S32 2846 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2847 Float32Regs:$b, Float32Regs:$a), 2848 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2849 Int32Regs:$z), 2850 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2851 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2852 []>; 2853def TEX_3D_F32_F32 2854 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2855 Float32Regs:$b, Float32Regs:$a), 2856 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2857 Float32Regs:$z), 2858 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2859 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2860 []>; 2861def TEX_3D_F32_F32_LEVEL 2862 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2863 Float32Regs:$b, Float32Regs:$a), 2864 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2865 Float32Regs:$z, Float32Regs:$lod), 2866 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2867 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2868 []>; 2869def TEX_3D_F32_F32_GRAD 2870 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2871 Float32Regs:$b, Float32Regs:$a), 2872 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2873 Float32Regs:$z, 2874 Float32Regs:$gradx0, Float32Regs:$gradx1, 2875 Float32Regs:$gradx2, Float32Regs:$grady0, 2876 Float32Regs:$grady1, Float32Regs:$grady2), 2877 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2878 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2879 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2880 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2881 []>; 2882def TEX_3D_S32_S32 2883 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2884 Int32Regs:$b, Int32Regs:$a), 2885 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2886 Int32Regs:$z), 2887 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2888 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2889 []>; 2890def TEX_3D_S32_F32 2891 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2892 Int32Regs:$b, Int32Regs:$a), 2893 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2894 Float32Regs:$z), 2895 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2896 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2897 []>; 2898def TEX_3D_S32_F32_LEVEL 2899 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2900 Int32Regs:$b, Int32Regs:$a), 2901 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2902 Float32Regs:$z, Float32Regs:$lod), 2903 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2904 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2905 []>; 2906def TEX_3D_S32_F32_GRAD 2907 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2908 Int32Regs:$b, Int32Regs:$a), 2909 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2910 Float32Regs:$z, 2911 Float32Regs:$gradx0, Float32Regs:$gradx1, 2912 Float32Regs:$gradx2, Float32Regs:$grady0, 2913 Float32Regs:$grady1, Float32Regs:$grady2), 2914 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2915 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2916 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2917 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2918 []>; 2919def TEX_3D_U32_S32 2920 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2921 Int32Regs:$b, Int32Regs:$a), 2922 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2923 Int32Regs:$z), 2924 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2925 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2926 []>; 2927def TEX_3D_U32_F32 2928 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2929 Int32Regs:$b, Int32Regs:$a), 2930 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2931 Float32Regs:$z), 2932 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2933 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2934 []>; 2935def TEX_3D_U32_F32_LEVEL 2936 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2937 Int32Regs:$b, Int32Regs:$a), 2938 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2939 Float32Regs:$z, Float32Regs:$lod), 2940 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2941 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2942 []>; 2943def TEX_3D_U32_F32_GRAD 2944 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2945 Int32Regs:$b, Int32Regs:$a), 2946 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2947 Float32Regs:$z, 2948 Float32Regs:$gradx0, Float32Regs:$gradx1, 2949 Float32Regs:$gradx2, Float32Regs:$grady0, 2950 Float32Regs:$grady1, Float32Regs:$grady2), 2951 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2952 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2953 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2954 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2955 []>; 2956 2957def TEX_CUBE_F32_F32 2958 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2959 Float32Regs:$b, Float32Regs:$a), 2960 (ins Int64Regs:$t, Int64Regs:$s, 2961 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2962 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2963 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2964 []>; 2965def TEX_CUBE_F32_F32_LEVEL 2966 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2967 Float32Regs:$b, Float32Regs:$a), 2968 (ins Int64Regs:$t, Int64Regs:$s, 2969 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2970 Float32Regs:$lod), 2971 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2972 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2973 []>; 2974def TEX_CUBE_S32_F32 2975 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2976 Int32Regs:$b, Int32Regs:$a), 2977 (ins Int64Regs:$t, Int64Regs:$s, 2978 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2979 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2980 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2981 []>; 2982def TEX_CUBE_S32_F32_LEVEL 2983 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2984 Int32Regs:$b, Int32Regs:$a), 2985 (ins Int64Regs:$t, Int64Regs:$s, 2986 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2987 Float32Regs:$lod), 2988 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2989 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2990 []>; 2991def TEX_CUBE_U32_F32 2992 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2993 Int32Regs:$b, Int32Regs:$a), 2994 (ins Int64Regs:$t, Int64Regs:$s, 2995 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2996 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2997 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2998 []>; 2999def TEX_CUBE_U32_F32_LEVEL 3000 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3001 Int32Regs:$b, Int32Regs:$a), 3002 (ins Int64Regs:$t, Int64Regs:$s, 3003 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3004 Float32Regs:$lod), 3005 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3006 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3007 []>; 3008 3009def TEX_CUBE_ARRAY_F32_F32 3010 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3011 Float32Regs:$b, Float32Regs:$a), 3012 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3013 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3014 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3015 "[$t, $s, \\{$l, $x, $y, $z\\}];", 3016 []>; 3017def TEX_CUBE_ARRAY_F32_F32_LEVEL 3018 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3019 Float32Regs:$b, Float32Regs:$a), 3020 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3021 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3022 Float32Regs:$lod), 3023 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3024 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3025 []>; 3026def TEX_CUBE_ARRAY_S32_F32 3027 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3028 Int32Regs:$b, Int32Regs:$a), 3029 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3030 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3031 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3032 "[$t, $s, \\{$l, $x, $y, $z\\}];", 3033 []>; 3034def TEX_CUBE_ARRAY_S32_F32_LEVEL 3035 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3036 Int32Regs:$b, Int32Regs:$a), 3037 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3038 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3039 Float32Regs:$lod), 3040 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3041 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3042 []>; 3043def TEX_CUBE_ARRAY_U32_F32 3044 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3045 Int32Regs:$b, Int32Regs:$a), 3046 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3047 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3048 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3049 "[$t, $s, \\{$l, $x, $y, $z\\}];", 3050 []>; 3051def TEX_CUBE_ARRAY_U32_F32_LEVEL 3052 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3053 Int32Regs:$b, Int32Regs:$a), 3054 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 3055 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3056 Float32Regs:$lod), 3057 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3058 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3059 []>; 3060 3061def TLD4_R_2D_F32_F32 3062 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3063 Float32Regs:$v2, Float32Regs:$v3), 3064 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3065 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3066 "[$t, $s, \\{$x, $y\\}];", 3067 []>; 3068def TLD4_G_2D_F32_F32 3069 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3070 Float32Regs:$v2, Float32Regs:$v3), 3071 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3072 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3073 "[$t, $s, \\{$x, $y\\}];", 3074 []>; 3075def TLD4_B_2D_F32_F32 3076 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3077 Float32Regs:$v2, Float32Regs:$v3), 3078 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3079 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3080 "[$t, $s, \\{$x, $y\\}];", 3081 []>; 3082def TLD4_A_2D_F32_F32 3083 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3084 Float32Regs:$v2, Float32Regs:$v3), 3085 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3086 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3087 "[$t, $s, \\{$x, $y\\}];", 3088 []>; 3089def TLD4_R_2D_S32_F32 3090 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3091 Int32Regs:$v2, Int32Regs:$v3), 3092 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3093 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3094 "[$t, $s, \\{$x, $y\\}];", 3095 []>; 3096def TLD4_G_2D_S32_F32 3097 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3098 Int32Regs:$v2, Int32Regs:$v3), 3099 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3100 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3101 "[$t, $s, \\{$x, $y\\}];", 3102 []>; 3103def TLD4_B_2D_S32_F32 3104 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3105 Int32Regs:$v2, Int32Regs:$v3), 3106 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3107 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3108 "[$t, $s, \\{$x, $y\\}];", 3109 []>; 3110def TLD4_A_2D_S32_F32 3111 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3112 Int32Regs:$v2, Int32Regs:$v3), 3113 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3114 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3115 "[$t, $s, \\{$x, $y\\}];", 3116 []>; 3117def TLD4_R_2D_U32_F32 3118 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3119 Int32Regs:$v2, Int32Regs:$v3), 3120 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3121 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3122 "[$t, $s, \\{$x, $y\\}];", 3123 []>; 3124def TLD4_G_2D_U32_F32 3125 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3126 Int32Regs:$v2, Int32Regs:$v3), 3127 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3128 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3129 "[$t, $s, \\{$x, $y\\}];", 3130 []>; 3131def TLD4_B_2D_U32_F32 3132 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3133 Int32Regs:$v2, Int32Regs:$v3), 3134 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3135 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3136 "[$t, $s, \\{$x, $y\\}];", 3137 []>; 3138def TLD4_A_2D_U32_F32 3139 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3140 Int32Regs:$v2, Int32Regs:$v3), 3141 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 3142 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3143 "[$t, $s, \\{$x, $y\\}];", 3144 []>; 3145} 3146 3147 3148// texmode_unified 3149let IsTex = true, IsTexModeUnified = true in { 3150// Texture fetch instructions using handles 3151def TEX_UNIFIED_1D_F32_S32 3152 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3153 Float32Regs:$b, Float32Regs:$a), 3154 (ins Int64Regs:$t, Int32Regs:$x), 3155 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3156 []>; 3157def TEX_UNIFIED_1D_F32_F32 3158 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3159 Float32Regs:$b, Float32Regs:$a), 3160 (ins Int64Regs:$t, Float32Regs:$x), 3161 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3162 []>; 3163def TEX_UNIFIED_1D_F32_F32_LEVEL 3164 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3165 Float32Regs:$b, Float32Regs:$a), 3166 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), 3167 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3168 "[$t, \\{$x\\}], $lod;", 3169 []>; 3170def TEX_UNIFIED_1D_F32_F32_GRAD 3171 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3172 Float32Regs:$b, Float32Regs:$a), 3173 (ins Int64Regs:$t, Float32Regs:$x, 3174 Float32Regs:$gradx, Float32Regs:$grady), 3175 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3176 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3177 []>; 3178def TEX_UNIFIED_1D_S32_S32 3179 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3180 Int32Regs:$b, Int32Regs:$a), 3181 (ins Int64Regs:$t, Int32Regs:$x), 3182 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3183 []>; 3184def TEX_UNIFIED_1D_S32_F32 3185 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3186 Int32Regs:$b, Int32Regs:$a), 3187 (ins Int64Regs:$t, Float32Regs:$x), 3188 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3189 []>; 3190def TEX_UNIFIED_1D_S32_F32_LEVEL 3191 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3192 Int32Regs:$b, Int32Regs:$a), 3193 (ins Int64Regs:$t, Float32Regs:$x, 3194 Float32Regs:$lod), 3195 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3196 "[$t, \\{$x\\}], $lod;", 3197 []>; 3198def TEX_UNIFIED_1D_S32_F32_GRAD 3199 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3200 Int32Regs:$b, Int32Regs:$a), 3201 (ins Int64Regs:$t, Float32Regs:$x, 3202 Float32Regs:$gradx, Float32Regs:$grady), 3203 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3204 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3205 []>; 3206def TEX_UNIFIED_1D_U32_S32 3207 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3208 Int32Regs:$b, Int32Regs:$a), 3209 (ins Int64Regs:$t, Int32Regs:$x), 3210 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3211 []>; 3212def TEX_UNIFIED_1D_U32_F32 3213 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3214 Int32Regs:$b, Int32Regs:$a), 3215 (ins Int64Regs:$t, Float32Regs:$x), 3216 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3217 []>; 3218def TEX_UNIFIED_1D_U32_F32_LEVEL 3219 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3220 Int32Regs:$b, Int32Regs:$a), 3221 (ins Int64Regs:$t, Float32Regs:$x, 3222 Float32Regs:$lod), 3223 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3224 "[$t, \\{$x\\}], $lod;", 3225 []>; 3226def TEX_UNIFIED_1D_U32_F32_GRAD 3227 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3228 Int32Regs:$b, Int32Regs:$a), 3229 (ins Int64Regs:$t, Float32Regs:$x, 3230 Float32Regs:$gradx, Float32Regs:$grady), 3231 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3232 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3233 []>; 3234 3235def TEX_UNIFIED_1D_ARRAY_F32_S32 3236 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3237 Float32Regs:$b, Float32Regs:$a), 3238 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3239 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3240 "[$t, \\{$l, $x\\}];", 3241 []>; 3242def TEX_UNIFIED_1D_ARRAY_F32_F32 3243 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3244 Float32Regs:$b, Float32Regs:$a), 3245 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3246 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3247 "[$t, \\{$l, $x\\}];", 3248 []>; 3249def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3250 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3251 Float32Regs:$b, Float32Regs:$a), 3252 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3253 Float32Regs:$lod), 3254 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3255 "[$t, \\{$l, $x\\}], $lod;", 3256 []>; 3257def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3258 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3259 Float32Regs:$b, Float32Regs:$a), 3260 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3261 Float32Regs:$gradx, Float32Regs:$grady), 3262 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3263 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3264 []>; 3265def TEX_UNIFIED_1D_ARRAY_S32_S32 3266 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3267 Int32Regs:$b, Int32Regs:$a), 3268 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3269 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3270 "[$t, \\{$l, $x\\}];", 3271 []>; 3272def TEX_UNIFIED_1D_ARRAY_S32_F32 3273 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3274 Int32Regs:$b, Int32Regs:$a), 3275 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3276 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3277 "[$t, \\{$l, $x\\}];", 3278 []>; 3279def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3280 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3281 Int32Regs:$b, Int32Regs:$a), 3282 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3283 Float32Regs:$lod), 3284 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3285 "[$t, \\{$l, $x\\}], $lod;", 3286 []>; 3287def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3288 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3289 Int32Regs:$b, Int32Regs:$a), 3290 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3291 Float32Regs:$gradx, Float32Regs:$grady), 3292 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3293 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3294 []>; 3295def TEX_UNIFIED_1D_ARRAY_U32_S32 3296 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3297 Int32Regs:$b, Int32Regs:$a), 3298 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3299 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3300 "[$t, \\{$l, $x\\}];", 3301 []>; 3302def TEX_UNIFIED_1D_ARRAY_U32_F32 3303 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3304 Int32Regs:$b, Int32Regs:$a), 3305 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3306 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3307 "[$t, \\{$l, $x\\}];", 3308 []>; 3309def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3310 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3311 Int32Regs:$b, Int32Regs:$a), 3312 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3313 Float32Regs:$lod), 3314 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3315 "[$t, \\{$l, $x\\}], $lod;", 3316 []>; 3317def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3318 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3319 Int32Regs:$b, Int32Regs:$a), 3320 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3321 Float32Regs:$gradx, Float32Regs:$grady), 3322 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3323 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3324 []>; 3325 3326def TEX_UNIFIED_2D_F32_S32 3327 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3328 Float32Regs:$b, Float32Regs:$a), 3329 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3330 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3331 "[$t, \\{$x, $y\\}];", 3332 []>; 3333def TEX_UNIFIED_2D_F32_F32 3334 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3335 Float32Regs:$b, Float32Regs:$a), 3336 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3337 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3338 "[$t, \\{$x, $y\\}];", 3339 []>; 3340def TEX_UNIFIED_2D_F32_F32_LEVEL 3341 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3342 Float32Regs:$b, Float32Regs:$a), 3343 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3344 Float32Regs:$lod), 3345 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3346 "[$t, \\{$x, $y\\}], $lod;", 3347 []>; 3348def TEX_UNIFIED_2D_F32_F32_GRAD 3349 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3350 Float32Regs:$b, Float32Regs:$a), 3351 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3352 Float32Regs:$gradx0, Float32Regs:$gradx1, 3353 Float32Regs:$grady0, Float32Regs:$grady1), 3354 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3355 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3356 "\\{$grady0, $grady1\\};", 3357 []>; 3358def TEX_UNIFIED_2D_S32_S32 3359 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3360 Int32Regs:$b, Int32Regs:$a), 3361 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3362 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3363 "[$t, \\{$x, $y\\}];", 3364 []>; 3365def TEX_UNIFIED_2D_S32_F32 3366 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3367 Int32Regs:$b, Int32Regs:$a), 3368 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3369 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3370 "[$t, \\{$x, $y\\}];", 3371 []>; 3372def TEX_UNIFIED_2D_S32_F32_LEVEL 3373 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3374 Int32Regs:$b, Int32Regs:$a), 3375 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3376 Float32Regs:$lod), 3377 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3378 "[$t, \\{$x, $y\\}], $lod;", 3379 []>; 3380def TEX_UNIFIED_2D_S32_F32_GRAD 3381 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3382 Int32Regs:$b, Int32Regs:$a), 3383 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3384 Float32Regs:$gradx0, Float32Regs:$gradx1, 3385 Float32Regs:$grady0, Float32Regs:$grady1), 3386 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3387 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3388 "\\{$grady0, $grady1\\};", 3389 []>; 3390def TEX_UNIFIED_2D_U32_S32 3391 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3392 Int32Regs:$b, Int32Regs:$a), 3393 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3394 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3395 "[$t, \\{$x, $y\\}];", 3396 []>; 3397def TEX_UNIFIED_2D_U32_F32 3398 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3399 Int32Regs:$b, Int32Regs:$a), 3400 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3401 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3402 "[$t, \\{$x, $y\\}];", 3403 []>; 3404def TEX_UNIFIED_2D_U32_F32_LEVEL 3405 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3406 Int32Regs:$b, Int32Regs:$a), 3407 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3408 Float32Regs:$lod), 3409 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3410 "[$t, \\{$x, $y\\}], $lod;", 3411 []>; 3412def TEX_UNIFIED_2D_U32_F32_GRAD 3413 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3414 Int32Regs:$b, Int32Regs:$a), 3415 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3416 Float32Regs:$gradx0, Float32Regs:$gradx1, 3417 Float32Regs:$grady0, Float32Regs:$grady1), 3418 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3419 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3420 "\\{$grady0, $grady1\\};", 3421 []>; 3422 3423def TEX_UNIFIED_2D_ARRAY_F32_S32 3424 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3425 Float32Regs:$b, Float32Regs:$a), 3426 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3427 Int32Regs:$y), 3428 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3429 "[$t, \\{$l, $x, $y, $y\\}];", 3430 []>; 3431def TEX_UNIFIED_2D_ARRAY_F32_F32 3432 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3433 Float32Regs:$b, Float32Regs:$a), 3434 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3435 Float32Regs:$y), 3436 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3437 "[$t, \\{$l, $x, $y, $y\\}];", 3438 []>; 3439def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3440 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3441 Float32Regs:$b, Float32Regs:$a), 3442 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3443 Float32Regs:$y, Float32Regs:$lod), 3444 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3445 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3446 []>; 3447def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3448 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3449 Float32Regs:$b, Float32Regs:$a), 3450 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3451 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 3452 Float32Regs:$grady0, Float32Regs:$grady1), 3453 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3454 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3455 "\\{$grady0, $grady1\\};", 3456 []>; 3457def TEX_UNIFIED_2D_ARRAY_S32_S32 3458 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3459 Int32Regs:$b, Int32Regs:$a), 3460 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3461 Int32Regs:$y), 3462 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3463 "[$t, \\{$l, $x, $y, $y\\}];", 3464 []>; 3465def TEX_UNIFIED_2D_ARRAY_S32_F32 3466 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3467 Int32Regs:$b, Int32Regs:$a), 3468 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3469 Float32Regs:$y), 3470 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3471 "[$t, \\{$l, $x, $y, $y\\}];", 3472 []>; 3473def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3474 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3475 Int32Regs:$b, Int32Regs:$a), 3476 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3477 Float32Regs:$y, Float32Regs:$lod), 3478 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3479 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3480 []>; 3481def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3482 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3483 Int32Regs:$b, Int32Regs:$a), 3484 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3485 Float32Regs:$y, 3486 Float32Regs:$gradx0, Float32Regs:$gradx1, 3487 Float32Regs:$grady0, Float32Regs:$grady1), 3488 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3489 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3490 "\\{$grady0, $grady1\\};", 3491 []>; 3492def TEX_UNIFIED_2D_ARRAY_U32_S32 3493 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3494 Int32Regs:$b, Int32Regs:$a), 3495 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3496 Int32Regs:$y), 3497 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3498 "[$t, \\{$l, $x, $y, $y\\}];", 3499 []>; 3500def TEX_UNIFIED_2D_ARRAY_U32_F32 3501 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3502 Int32Regs:$b, Int32Regs:$a), 3503 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3504 Float32Regs:$y), 3505 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3506 "[$t, \\{$l, $x, $y, $y\\}];", 3507 []>; 3508def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3509 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3510 Int32Regs:$b, Int32Regs:$a), 3511 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3512 Float32Regs:$y, Float32Regs:$lod), 3513 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3514 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3515 []>; 3516def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3517 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3518 Int32Regs:$b, Int32Regs:$a), 3519 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3520 Float32Regs:$y, 3521 Float32Regs:$gradx0, Float32Regs:$gradx1, 3522 Float32Regs:$grady0, Float32Regs:$grady1), 3523 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3524 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3525 "\\{$grady0, $grady1\\};", 3526 []>; 3527 3528def TEX_UNIFIED_3D_F32_S32 3529 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3530 Float32Regs:$b, Float32Regs:$a), 3531 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3532 Int32Regs:$z), 3533 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3534 "[$t, \\{$x, $y, $z, $z\\}];", 3535 []>; 3536def TEX_UNIFIED_3D_F32_F32 3537 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3538 Float32Regs:$b, Float32Regs:$a), 3539 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3540 Float32Regs:$z), 3541 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3542 "[$t, \\{$x, $y, $z, $z\\}];", 3543 []>; 3544def TEX_UNIFIED_3D_F32_F32_LEVEL 3545 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3546 Float32Regs:$b, Float32Regs:$a), 3547 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3548 Float32Regs:$z, Float32Regs:$lod), 3549 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3550 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3551 []>; 3552def TEX_UNIFIED_3D_F32_F32_GRAD 3553 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3554 Float32Regs:$b, Float32Regs:$a), 3555 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3556 Float32Regs:$z, 3557 Float32Regs:$gradx0, Float32Regs:$gradx1, 3558 Float32Regs:$gradx2, Float32Regs:$grady0, 3559 Float32Regs:$grady1, Float32Regs:$grady2), 3560 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3561 "[$t, \\{$x, $y, $z, $z\\}], " 3562 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3563 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3564 []>; 3565def TEX_UNIFIED_3D_S32_S32 3566 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3567 Int32Regs:$b, Int32Regs:$a), 3568 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3569 Int32Regs:$z), 3570 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3571 "[$t, \\{$x, $y, $z, $z\\}];", 3572 []>; 3573def TEX_UNIFIED_3D_S32_F32 3574 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3575 Int32Regs:$b, Int32Regs:$a), 3576 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3577 Float32Regs:$z), 3578 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3579 "[$t, \\{$x, $y, $z, $z\\}];", 3580 []>; 3581def TEX_UNIFIED_3D_S32_F32_LEVEL 3582 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3583 Int32Regs:$b, Int32Regs:$a), 3584 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3585 Float32Regs:$z, Float32Regs:$lod), 3586 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3587 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3588 []>; 3589def TEX_UNIFIED_3D_S32_F32_GRAD 3590 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3591 Int32Regs:$b, Int32Regs:$a), 3592 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3593 Float32Regs:$z, 3594 Float32Regs:$gradx0, Float32Regs:$gradx1, 3595 Float32Regs:$gradx2, Float32Regs:$grady0, 3596 Float32Regs:$grady1, Float32Regs:$grady2), 3597 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3598 "[$t, \\{$x, $y, $z, $z\\}], " 3599 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3600 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3601 []>; 3602def TEX_UNIFIED_3D_U32_S32 3603 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3604 Int32Regs:$b, Int32Regs:$a), 3605 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3606 Int32Regs:$z), 3607 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3608 "[$t, \\{$x, $y, $z, $z\\}];", 3609 []>; 3610def TEX_UNIFIED_3D_U32_F32 3611 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3612 Int32Regs:$b, Int32Regs:$a), 3613 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3614 Float32Regs:$z), 3615 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3616 "[$t, \\{$x, $y, $z, $z\\}];", 3617 []>; 3618def TEX_UNIFIED_3D_U32_F32_LEVEL 3619 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3620 Int32Regs:$b, Int32Regs:$a), 3621 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3622 Float32Regs:$z, Float32Regs:$lod), 3623 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3624 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3625 []>; 3626def TEX_UNIFIED_3D_U32_F32_GRAD 3627 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3628 Int32Regs:$b, Int32Regs:$a), 3629 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3630 Float32Regs:$z, 3631 Float32Regs:$gradx0, Float32Regs:$gradx1, 3632 Float32Regs:$gradx2, Float32Regs:$grady0, 3633 Float32Regs:$grady1, Float32Regs:$grady2), 3634 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3635 "[$t, \\{$x, $y, $z, $z\\}], " 3636 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3637 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3638 []>; 3639 3640def TEX_UNIFIED_CUBE_F32_F32 3641 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3642 Float32Regs:$b, Float32Regs:$a), 3643 (ins Int64Regs:$t, 3644 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3645 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3646 "[$t, \\{$x, $y, $z, $z\\}];", 3647 []>; 3648def TEX_UNIFIED_CUBE_F32_F32_LEVEL 3649 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3650 Float32Regs:$b, Float32Regs:$a), 3651 (ins Int64Regs:$t, 3652 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3653 Float32Regs:$lod), 3654 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3655 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3656 []>; 3657def TEX_UNIFIED_CUBE_S32_F32 3658 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3659 Int32Regs:$b, Int32Regs:$a), 3660 (ins Int64Regs:$t, 3661 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3662 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3663 "[$t, \\{$x, $y, $z, $z\\}];", 3664 []>; 3665def TEX_UNIFIED_CUBE_S32_F32_LEVEL 3666 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3667 Int32Regs:$b, Int32Regs:$a), 3668 (ins Int64Regs:$t, 3669 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3670 Float32Regs:$lod), 3671 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3672 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3673 []>; 3674def TEX_UNIFIED_CUBE_U32_F32 3675 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3676 Int32Regs:$b, Int32Regs:$a), 3677 (ins Int64Regs:$t, 3678 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3679 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3680 "[$t, \\{$x, $y, $z, $z\\}];", 3681 []>; 3682def TEX_UNIFIED_CUBE_U32_F32_LEVEL 3683 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3684 Int32Regs:$b, Int32Regs:$a), 3685 (ins Int64Regs:$t, 3686 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3687 Float32Regs:$lod), 3688 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3689 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3690 []>; 3691 3692def TEX_UNIFIED_CUBE_ARRAY_F32_F32 3693 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3694 Float32Regs:$b, Float32Regs:$a), 3695 (ins Int64Regs:$t, Int32Regs:$l, 3696 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3697 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3698 "[$t, \\{$l, $x, $y, $z\\}];", 3699 []>; 3700def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3701 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3702 Float32Regs:$b, Float32Regs:$a), 3703 (ins Int64Regs:$t, Int32Regs:$l, 3704 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3705 Float32Regs:$lod), 3706 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3707 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3708 []>; 3709def TEX_UNIFIED_CUBE_ARRAY_S32_F32 3710 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3711 Int32Regs:$b, Int32Regs:$a), 3712 (ins Int64Regs:$t, Int32Regs:$l, 3713 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3714 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3715 "[$t, \\{$l, $x, $y, $z\\}];", 3716 []>; 3717def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3718 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3719 Int32Regs:$b, Int32Regs:$a), 3720 (ins Int64Regs:$t, Int32Regs:$l, 3721 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3722 Float32Regs:$lod), 3723 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3724 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3725 []>; 3726def TEX_UNIFIED_CUBE_ARRAY_U32_F32 3727 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3728 Int32Regs:$b, Int32Regs:$a), 3729 (ins Int64Regs:$t, Int32Regs:$l, 3730 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3731 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3732 "[$t, \\{$l, $x, $y, $z\\}];", 3733 []>; 3734def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3735 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3736 Int32Regs:$b, Int32Regs:$a), 3737 (ins Int64Regs:$t, Int32Regs:$l, 3738 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3739 Float32Regs:$lod), 3740 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3741 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3742 []>; 3743 3744def TLD4_UNIFIED_R_2D_F32_F32 3745 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3746 Float32Regs:$v2, Float32Regs:$v3), 3747 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3748 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3749 "[$t, \\{$x, $y\\}];", 3750 []>; 3751def TLD4_UNIFIED_G_2D_F32_F32 3752 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3753 Float32Regs:$v2, Float32Regs:$v3), 3754 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3755 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3756 "[$t, \\{$x, $y\\}];", 3757 []>; 3758def TLD4_UNIFIED_B_2D_F32_F32 3759 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3760 Float32Regs:$v2, Float32Regs:$v3), 3761 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3762 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3763 "[$t, \\{$x, $y\\}];", 3764 []>; 3765def TLD4_UNIFIED_A_2D_F32_F32 3766 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3767 Float32Regs:$v2, Float32Regs:$v3), 3768 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3769 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3770 "[$t, \\{$x, $y\\}];", 3771 []>; 3772def TLD4_UNIFIED_R_2D_S32_F32 3773 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3774 Int32Regs:$v2, Int32Regs:$v3), 3775 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3776 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3777 "[$t, \\{$x, $y\\}];", 3778 []>; 3779def TLD4_UNIFIED_G_2D_S32_F32 3780 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3781 Int32Regs:$v2, Int32Regs:$v3), 3782 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3783 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3784 "[$t, \\{$x, $y\\}];", 3785 []>; 3786def TLD4_UNIFIED_B_2D_S32_F32 3787 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3788 Int32Regs:$v2, Int32Regs:$v3), 3789 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3790 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3791 "[$t, \\{$x, $y\\}];", 3792 []>; 3793def TLD4_UNIFIED_A_2D_S32_F32 3794 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3795 Int32Regs:$v2, Int32Regs:$v3), 3796 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3797 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3798 "[$t, \\{$x, $y\\}];", 3799 []>; 3800def TLD4_UNIFIED_R_2D_U32_F32 3801 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3802 Int32Regs:$v2, Int32Regs:$v3), 3803 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3804 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3805 "[$t, \\{$x, $y\\}];", 3806 []>; 3807def TLD4_UNIFIED_G_2D_U32_F32 3808 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3809 Int32Regs:$v2, Int32Regs:$v3), 3810 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3811 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3812 "[$t, \\{$x, $y\\}];", 3813 []>; 3814def TLD4_UNIFIED_B_2D_U32_F32 3815 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3816 Int32Regs:$v2, Int32Regs:$v3), 3817 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3818 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3819 "[$t, \\{$x, $y\\}];", 3820 []>; 3821def TLD4_UNIFIED_A_2D_U32_F32 3822 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3823 Int32Regs:$v2, Int32Regs:$v3), 3824 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3825 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3826 "[$t, \\{$x, $y\\}];", 3827 []>; 3828} 3829 3830 3831 3832//=== Surface load instructions 3833// .clamp variant 3834let IsSuld = true in { 3835def SULD_1D_I8_CLAMP 3836 : NVPTXInst<(outs Int16Regs:$r), 3837 (ins Int64Regs:$s, Int32Regs:$x), 3838 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", 3839 []>; 3840def SULD_1D_I16_CLAMP 3841 : NVPTXInst<(outs Int16Regs:$r), 3842 (ins Int64Regs:$s, Int32Regs:$x), 3843 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", 3844 []>; 3845def SULD_1D_I32_CLAMP 3846 : NVPTXInst<(outs Int32Regs:$r), 3847 (ins Int64Regs:$s, Int32Regs:$x), 3848 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", 3849 []>; 3850def SULD_1D_I64_CLAMP 3851 : NVPTXInst<(outs Int64Regs:$r), 3852 (ins Int64Regs:$s, Int32Regs:$x), 3853 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", 3854 []>; 3855 3856def SULD_1D_ARRAY_I8_CLAMP 3857 : NVPTXInst<(outs Int16Regs:$r), 3858 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3859 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3860 []>; 3861def SULD_1D_ARRAY_I16_CLAMP 3862 : NVPTXInst<(outs Int16Regs:$r), 3863 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3864 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3865 []>; 3866def SULD_1D_ARRAY_I32_CLAMP 3867 : NVPTXInst<(outs Int32Regs:$r), 3868 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3869 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3870 []>; 3871def SULD_1D_ARRAY_I64_CLAMP 3872 : NVPTXInst<(outs Int64Regs:$r), 3873 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3874 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3875 []>; 3876 3877def SULD_2D_I8_CLAMP 3878 : NVPTXInst<(outs Int16Regs:$r), 3879 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3880 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3881 []>; 3882def SULD_2D_I16_CLAMP 3883 : NVPTXInst<(outs Int16Regs:$r), 3884 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3885 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3886 []>; 3887def SULD_2D_I32_CLAMP 3888 : NVPTXInst<(outs Int32Regs:$r), 3889 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3890 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3891 []>; 3892def SULD_2D_I64_CLAMP 3893 : NVPTXInst<(outs Int64Regs:$r), 3894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3895 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3896 []>; 3897 3898def SULD_2D_ARRAY_I8_CLAMP 3899 : NVPTXInst<(outs Int16Regs:$r), 3900 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3901 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3902 []>; 3903def SULD_2D_ARRAY_I16_CLAMP 3904 : NVPTXInst<(outs Int16Regs:$r), 3905 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3906 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3907 []>; 3908def SULD_2D_ARRAY_I32_CLAMP 3909 : NVPTXInst<(outs Int32Regs:$r), 3910 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3911 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3912 []>; 3913def SULD_2D_ARRAY_I64_CLAMP 3914 : NVPTXInst<(outs Int64Regs:$r), 3915 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3916 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3917 []>; 3918 3919def SULD_3D_I8_CLAMP 3920 : NVPTXInst<(outs Int16Regs:$r), 3921 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3922 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3923 []>; 3924def SULD_3D_I16_CLAMP 3925 : NVPTXInst<(outs Int16Regs:$r), 3926 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3927 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3928 []>; 3929def SULD_3D_I32_CLAMP 3930 : NVPTXInst<(outs Int32Regs:$r), 3931 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3932 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3933 []>; 3934def SULD_3D_I64_CLAMP 3935 : NVPTXInst<(outs Int64Regs:$r), 3936 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3937 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3938 []>; 3939} 3940 3941let IsSuld = 2 in { 3942def SULD_1D_V2I8_CLAMP 3943 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3944 (ins Int64Regs:$s, Int32Regs:$x), 3945 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3946 []>; 3947def SULD_1D_V2I16_CLAMP 3948 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3949 (ins Int64Regs:$s, Int32Regs:$x), 3950 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3951 []>; 3952def SULD_1D_V2I32_CLAMP 3953 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3954 (ins Int64Regs:$s, Int32Regs:$x), 3955 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3956 []>; 3957def SULD_1D_V2I64_CLAMP 3958 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3959 (ins Int64Regs:$s, Int32Regs:$x), 3960 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3961 []>; 3962 3963def SULD_1D_ARRAY_V2I8_CLAMP 3964 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3965 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3966 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3967 []>; 3968def SULD_1D_ARRAY_V2I16_CLAMP 3969 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3970 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3971 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3972 []>; 3973def SULD_1D_ARRAY_V2I32_CLAMP 3974 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3975 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3976 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3977 []>; 3978def SULD_1D_ARRAY_V2I64_CLAMP 3979 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3980 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3981 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3982 []>; 3983 3984def SULD_2D_V2I8_CLAMP 3985 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3986 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3987 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3988 []>; 3989def SULD_2D_V2I16_CLAMP 3990 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3991 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3992 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3993 []>; 3994def SULD_2D_V2I32_CLAMP 3995 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3996 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3997 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3998 []>; 3999def SULD_2D_V2I64_CLAMP 4000 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4001 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4002 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4003 []>; 4004 4005def SULD_2D_ARRAY_V2I8_CLAMP 4006 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4007 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4008 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " 4009 "[$s, \\{$l, $x, $y, $y\\}];", 4010 []>; 4011def SULD_2D_ARRAY_V2I16_CLAMP 4012 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4013 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4014 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " 4015 "[$s, \\{$l, $x, $y, $y\\}];", 4016 []>; 4017def SULD_2D_ARRAY_V2I32_CLAMP 4018 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4019 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4020 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " 4021 "[$s, \\{$l, $x, $y, $y\\}];", 4022 []>; 4023def SULD_2D_ARRAY_V2I64_CLAMP 4024 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4025 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4026 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " 4027 "[$s, \\{$l, $x, $y, $y\\}];", 4028 []>; 4029 4030def SULD_3D_V2I8_CLAMP 4031 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4032 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4033 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4034 []>; 4035def SULD_3D_V2I16_CLAMP 4036 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4037 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4038 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4039 []>; 4040def SULD_3D_V2I32_CLAMP 4041 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4042 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4043 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4044 []>; 4045def SULD_3D_V2I64_CLAMP 4046 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4047 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4048 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4049 []>; 4050} 4051 4052let IsSuld = 3 in { 4053def SULD_1D_V4I8_CLAMP 4054 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4055 (ins Int64Regs:$s, Int32Regs:$x), 4056 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4057 []>; 4058def SULD_1D_V4I16_CLAMP 4059 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4060 (ins Int64Regs:$s, Int32Regs:$x), 4061 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4062 []>; 4063def SULD_1D_V4I32_CLAMP 4064 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4065 (ins Int64Regs:$s, Int32Regs:$x), 4066 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4067 []>; 4068 4069def SULD_1D_ARRAY_V4I8_CLAMP 4070 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4071 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4072 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 4073 "[$s, \\{$l, $x\\}];", 4074 []>; 4075def SULD_1D_ARRAY_V4I16_CLAMP 4076 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4077 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4078 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 4079 "[$s, \\{$l, $x\\}];", 4080 []>; 4081def SULD_1D_ARRAY_V4I32_CLAMP 4082 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4083 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4084 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 4085 "[$s, \\{$l, $x\\}];", 4086 []>; 4087 4088def SULD_2D_V4I8_CLAMP 4089 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4090 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4091 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4092 []>; 4093def SULD_2D_V4I16_CLAMP 4094 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4095 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4096 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4097 []>; 4098def SULD_2D_V4I32_CLAMP 4099 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4100 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4101 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4102 []>; 4103 4104def SULD_2D_ARRAY_V4I8_CLAMP 4105 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4106 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4107 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 4108 "[$s, \\{$l, $x, $y, $y\\}];", 4109 []>; 4110def SULD_2D_ARRAY_V4I16_CLAMP 4111 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4112 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4113 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 4114 "[$s, \\{$l, $x, $y, $y\\}];", 4115 []>; 4116def SULD_2D_ARRAY_V4I32_CLAMP 4117 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4118 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4119 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 4120 "[$s, \\{$l, $x, $y, $y\\}];", 4121 []>; 4122 4123 4124def SULD_3D_V4I8_CLAMP 4125 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4126 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4127 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 4128 "[$s, \\{$x, $y, $z, $z\\}];", 4129 []>; 4130def SULD_3D_V4I16_CLAMP 4131 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4132 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4133 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 4134 "[$s, \\{$x, $y, $z, $z\\}];", 4135 []>; 4136def SULD_3D_V4I32_CLAMP 4137 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4138 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4139 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 4140 "[$s, \\{$x, $y, $z, $z\\}];", 4141 []>; 4142} 4143 4144 4145// .trap variant 4146let IsSuld = true in { 4147def SULD_1D_I8_TRAP 4148 : NVPTXInst<(outs Int16Regs:$r), 4149 (ins Int64Regs:$s, Int32Regs:$x), 4150 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", 4151 []>; 4152def SULD_1D_I16_TRAP 4153 : NVPTXInst<(outs Int16Regs:$r), 4154 (ins Int64Regs:$s, Int32Regs:$x), 4155 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", 4156 []>; 4157def SULD_1D_I32_TRAP 4158 : NVPTXInst<(outs Int32Regs:$r), 4159 (ins Int64Regs:$s, Int32Regs:$x), 4160 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", 4161 []>; 4162def SULD_1D_I64_TRAP 4163 : NVPTXInst<(outs Int64Regs:$r), 4164 (ins Int64Regs:$s, Int32Regs:$x), 4165 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", 4166 []>; 4167 4168def SULD_1D_ARRAY_I8_TRAP 4169 : NVPTXInst<(outs Int16Regs:$r), 4170 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4171 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4172 []>; 4173def SULD_1D_ARRAY_I16_TRAP 4174 : NVPTXInst<(outs Int16Regs:$r), 4175 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4176 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4177 []>; 4178def SULD_1D_ARRAY_I32_TRAP 4179 : NVPTXInst<(outs Int32Regs:$r), 4180 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4181 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4182 []>; 4183def SULD_1D_ARRAY_I64_TRAP 4184 : NVPTXInst<(outs Int64Regs:$r), 4185 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4186 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4187 []>; 4188 4189def SULD_2D_I8_TRAP 4190 : NVPTXInst<(outs Int16Regs:$r), 4191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4192 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4193 []>; 4194def SULD_2D_I16_TRAP 4195 : NVPTXInst<(outs Int16Regs:$r), 4196 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4197 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4198 []>; 4199def SULD_2D_I32_TRAP 4200 : NVPTXInst<(outs Int32Regs:$r), 4201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4202 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4203 []>; 4204def SULD_2D_I64_TRAP 4205 : NVPTXInst<(outs Int64Regs:$r), 4206 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4207 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4208 []>; 4209 4210def SULD_2D_ARRAY_I8_TRAP 4211 : NVPTXInst<(outs Int16Regs:$r), 4212 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4213 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4214 []>; 4215def SULD_2D_ARRAY_I16_TRAP 4216 : NVPTXInst<(outs Int16Regs:$r), 4217 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4218 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4219 []>; 4220def SULD_2D_ARRAY_I32_TRAP 4221 : NVPTXInst<(outs Int32Regs:$r), 4222 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4223 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4224 []>; 4225def SULD_2D_ARRAY_I64_TRAP 4226 : NVPTXInst<(outs Int64Regs:$r), 4227 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4228 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4229 []>; 4230 4231def SULD_3D_I8_TRAP 4232 : NVPTXInst<(outs Int16Regs:$r), 4233 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4234 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4235 []>; 4236def SULD_3D_I16_TRAP 4237 : NVPTXInst<(outs Int16Regs:$r), 4238 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4239 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4240 []>; 4241def SULD_3D_I32_TRAP 4242 : NVPTXInst<(outs Int32Regs:$r), 4243 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4244 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4245 []>; 4246def SULD_3D_I64_TRAP 4247 : NVPTXInst<(outs Int64Regs:$r), 4248 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4249 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4250 []>; 4251} 4252 4253let IsSuld = 2 in { 4254def SULD_1D_V2I8_TRAP 4255 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4256 (ins Int64Regs:$s, Int32Regs:$x), 4257 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4258 []>; 4259def SULD_1D_V2I16_TRAP 4260 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4261 (ins Int64Regs:$s, Int32Regs:$x), 4262 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4263 []>; 4264def SULD_1D_V2I32_TRAP 4265 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4266 (ins Int64Regs:$s, Int32Regs:$x), 4267 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4268 []>; 4269def SULD_1D_V2I64_TRAP 4270 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4271 (ins Int64Regs:$s, Int32Regs:$x), 4272 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4273 []>; 4274 4275def SULD_1D_ARRAY_V2I8_TRAP 4276 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4277 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4278 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4279 []>; 4280def SULD_1D_ARRAY_V2I16_TRAP 4281 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4282 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4283 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4284 []>; 4285def SULD_1D_ARRAY_V2I32_TRAP 4286 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4287 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4288 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4289 []>; 4290def SULD_1D_ARRAY_V2I64_TRAP 4291 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4292 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4293 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4294 []>; 4295 4296def SULD_2D_V2I8_TRAP 4297 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4298 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4299 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4300 []>; 4301def SULD_2D_V2I16_TRAP 4302 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4303 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4304 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4305 []>; 4306def SULD_2D_V2I32_TRAP 4307 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4308 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4309 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4310 []>; 4311def SULD_2D_V2I64_TRAP 4312 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4313 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4314 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4315 []>; 4316 4317def SULD_2D_ARRAY_V2I8_TRAP 4318 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4319 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4320 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " 4321 "[$s, \\{$l, $x, $y, $y\\}];", 4322 []>; 4323def SULD_2D_ARRAY_V2I16_TRAP 4324 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4325 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4326 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " 4327 "[$s, \\{$l, $x, $y, $y\\}];", 4328 []>; 4329def SULD_2D_ARRAY_V2I32_TRAP 4330 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4331 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4332 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " 4333 "[$s, \\{$l, $x, $y, $y\\}];", 4334 []>; 4335def SULD_2D_ARRAY_V2I64_TRAP 4336 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4337 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4338 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " 4339 "[$s, \\{$l, $x, $y, $y\\}];", 4340 []>; 4341 4342def SULD_3D_V2I8_TRAP 4343 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4344 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4345 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4346 []>; 4347def SULD_3D_V2I16_TRAP 4348 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4349 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4350 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4351 []>; 4352def SULD_3D_V2I32_TRAP 4353 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4354 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4355 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4356 []>; 4357def SULD_3D_V2I64_TRAP 4358 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4359 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4360 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4361 []>; 4362} 4363 4364let IsSuld = 3 in { 4365def SULD_1D_V4I8_TRAP 4366 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4367 (ins Int64Regs:$s, Int32Regs:$x), 4368 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4369 []>; 4370def SULD_1D_V4I16_TRAP 4371 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4372 (ins Int64Regs:$s, Int32Regs:$x), 4373 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4374 []>; 4375def SULD_1D_V4I32_TRAP 4376 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4377 (ins Int64Regs:$s, Int32Regs:$x), 4378 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4379 []>; 4380 4381def SULD_1D_ARRAY_V4I8_TRAP 4382 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4383 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4384 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4385 "[$s, \\{$l, $x\\}];", 4386 []>; 4387def SULD_1D_ARRAY_V4I16_TRAP 4388 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4389 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4390 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4391 "[$s, \\{$l, $x\\}];", 4392 []>; 4393def SULD_1D_ARRAY_V4I32_TRAP 4394 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4395 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4396 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4397 "[$s, \\{$l, $x\\}];", 4398 []>; 4399 4400def SULD_2D_V4I8_TRAP 4401 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4402 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4403 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4404 []>; 4405def SULD_2D_V4I16_TRAP 4406 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4407 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4408 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4409 []>; 4410def SULD_2D_V4I32_TRAP 4411 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4412 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4413 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4414 []>; 4415 4416def SULD_2D_ARRAY_V4I8_TRAP 4417 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4418 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4419 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4420 "[$s, \\{$l, $x, $y, $y\\}];", 4421 []>; 4422def SULD_2D_ARRAY_V4I16_TRAP 4423 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4424 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4425 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4426 "[$s, \\{$l, $x, $y, $y\\}];", 4427 []>; 4428def SULD_2D_ARRAY_V4I32_TRAP 4429 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4430 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4431 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4432 "[$s, \\{$l, $x, $y, $y\\}];", 4433 []>; 4434 4435 4436def SULD_3D_V4I8_TRAP 4437 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4438 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4439 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4440 "[$s, \\{$x, $y, $z, $z\\}];", 4441 []>; 4442def SULD_3D_V4I16_TRAP 4443 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4444 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4445 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4446 "[$s, \\{$x, $y, $z, $z\\}];", 4447 []>; 4448def SULD_3D_V4I32_TRAP 4449 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4450 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4451 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4452 "[$s, \\{$x, $y, $z, $z\\}];", 4453 []>; 4454} 4455 4456// .zero variant 4457let IsSuld = true in { 4458def SULD_1D_I8_ZERO 4459 : NVPTXInst<(outs Int16Regs:$r), 4460 (ins Int64Regs:$s, Int32Regs:$x), 4461 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", 4462 []>; 4463def SULD_1D_I16_ZERO 4464 : NVPTXInst<(outs Int16Regs:$r), 4465 (ins Int64Regs:$s, Int32Regs:$x), 4466 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", 4467 []>; 4468def SULD_1D_I32_ZERO 4469 : NVPTXInst<(outs Int32Regs:$r), 4470 (ins Int64Regs:$s, Int32Regs:$x), 4471 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", 4472 []>; 4473def SULD_1D_I64_ZERO 4474 : NVPTXInst<(outs Int64Regs:$r), 4475 (ins Int64Regs:$s, Int32Regs:$x), 4476 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", 4477 []>; 4478 4479def SULD_1D_ARRAY_I8_ZERO 4480 : NVPTXInst<(outs Int16Regs:$r), 4481 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4482 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4483 []>; 4484def SULD_1D_ARRAY_I16_ZERO 4485 : NVPTXInst<(outs Int16Regs:$r), 4486 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4487 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4488 []>; 4489def SULD_1D_ARRAY_I32_ZERO 4490 : NVPTXInst<(outs Int32Regs:$r), 4491 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4492 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4493 []>; 4494def SULD_1D_ARRAY_I64_ZERO 4495 : NVPTXInst<(outs Int64Regs:$r), 4496 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4497 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4498 []>; 4499 4500def SULD_2D_I8_ZERO 4501 : NVPTXInst<(outs Int16Regs:$r), 4502 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4503 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4504 []>; 4505def SULD_2D_I16_ZERO 4506 : NVPTXInst<(outs Int16Regs:$r), 4507 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4508 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4509 []>; 4510def SULD_2D_I32_ZERO 4511 : NVPTXInst<(outs Int32Regs:$r), 4512 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4513 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4514 []>; 4515def SULD_2D_I64_ZERO 4516 : NVPTXInst<(outs Int64Regs:$r), 4517 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4518 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4519 []>; 4520 4521def SULD_2D_ARRAY_I8_ZERO 4522 : NVPTXInst<(outs Int16Regs:$r), 4523 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4524 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4525 []>; 4526def SULD_2D_ARRAY_I16_ZERO 4527 : NVPTXInst<(outs Int16Regs:$r), 4528 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4529 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4530 []>; 4531def SULD_2D_ARRAY_I32_ZERO 4532 : NVPTXInst<(outs Int32Regs:$r), 4533 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4534 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4535 []>; 4536def SULD_2D_ARRAY_I64_ZERO 4537 : NVPTXInst<(outs Int64Regs:$r), 4538 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4539 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4540 []>; 4541 4542def SULD_3D_I8_ZERO 4543 : NVPTXInst<(outs Int16Regs:$r), 4544 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4545 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4546 []>; 4547def SULD_3D_I16_ZERO 4548 : NVPTXInst<(outs Int16Regs:$r), 4549 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4550 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4551 []>; 4552def SULD_3D_I32_ZERO 4553 : NVPTXInst<(outs Int32Regs:$r), 4554 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4555 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4556 []>; 4557def SULD_3D_I64_ZERO 4558 : NVPTXInst<(outs Int64Regs:$r), 4559 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4560 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4561 []>; 4562} 4563 4564let IsSuld = 2 in { 4565def SULD_1D_V2I8_ZERO 4566 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4567 (ins Int64Regs:$s, Int32Regs:$x), 4568 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4569 []>; 4570def SULD_1D_V2I16_ZERO 4571 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4572 (ins Int64Regs:$s, Int32Regs:$x), 4573 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4574 []>; 4575def SULD_1D_V2I32_ZERO 4576 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4577 (ins Int64Regs:$s, Int32Regs:$x), 4578 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4579 []>; 4580def SULD_1D_V2I64_ZERO 4581 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4582 (ins Int64Regs:$s, Int32Regs:$x), 4583 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4584 []>; 4585 4586def SULD_1D_ARRAY_V2I8_ZERO 4587 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4588 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4589 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4590 []>; 4591def SULD_1D_ARRAY_V2I16_ZERO 4592 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4593 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4594 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4595 []>; 4596def SULD_1D_ARRAY_V2I32_ZERO 4597 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4598 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4599 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4600 []>; 4601def SULD_1D_ARRAY_V2I64_ZERO 4602 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4603 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4604 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4605 []>; 4606 4607def SULD_2D_V2I8_ZERO 4608 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4609 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4610 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4611 []>; 4612def SULD_2D_V2I16_ZERO 4613 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4614 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4615 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4616 []>; 4617def SULD_2D_V2I32_ZERO 4618 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4619 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4620 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4621 []>; 4622def SULD_2D_V2I64_ZERO 4623 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4624 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4625 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4626 []>; 4627 4628def SULD_2D_ARRAY_V2I8_ZERO 4629 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4630 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4631 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " 4632 "[$s, \\{$l, $x, $y, $y\\}];", 4633 []>; 4634def SULD_2D_ARRAY_V2I16_ZERO 4635 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4636 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4637 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " 4638 "[$s, \\{$l, $x, $y, $y\\}];", 4639 []>; 4640def SULD_2D_ARRAY_V2I32_ZERO 4641 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4642 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4643 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " 4644 "[$s, \\{$l, $x, $y, $y\\}];", 4645 []>; 4646def SULD_2D_ARRAY_V2I64_ZERO 4647 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4648 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4649 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " 4650 "[$s, \\{$l, $x, $y, $y\\}];", 4651 []>; 4652 4653def SULD_3D_V2I8_ZERO 4654 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4655 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4656 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4657 []>; 4658def SULD_3D_V2I16_ZERO 4659 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4660 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4661 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4662 []>; 4663def SULD_3D_V2I32_ZERO 4664 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4665 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4666 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4667 []>; 4668def SULD_3D_V2I64_ZERO 4669 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4670 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4671 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4672 []>; 4673} 4674 4675let IsSuld = 3 in { 4676def SULD_1D_V4I8_ZERO 4677 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4678 (ins Int64Regs:$s, Int32Regs:$x), 4679 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4680 []>; 4681def SULD_1D_V4I16_ZERO 4682 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4683 (ins Int64Regs:$s, Int32Regs:$x), 4684 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4685 []>; 4686def SULD_1D_V4I32_ZERO 4687 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4688 (ins Int64Regs:$s, Int32Regs:$x), 4689 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4690 []>; 4691 4692def SULD_1D_ARRAY_V4I8_ZERO 4693 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4694 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4695 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4696 "[$s, \\{$l, $x\\}];", 4697 []>; 4698def SULD_1D_ARRAY_V4I16_ZERO 4699 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4700 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4701 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4702 "[$s, \\{$l, $x\\}];", 4703 []>; 4704def SULD_1D_ARRAY_V4I32_ZERO 4705 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4706 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4707 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4708 "[$s, \\{$l, $x\\}];", 4709 []>; 4710 4711def SULD_2D_V4I8_ZERO 4712 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4713 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4714 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4715 []>; 4716def SULD_2D_V4I16_ZERO 4717 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4718 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4719 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4720 []>; 4721def SULD_2D_V4I32_ZERO 4722 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4723 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4724 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4725 []>; 4726 4727def SULD_2D_ARRAY_V4I8_ZERO 4728 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4729 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4730 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4731 "[$s, \\{$l, $x, $y, $y\\}];", 4732 []>; 4733def SULD_2D_ARRAY_V4I16_ZERO 4734 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4735 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4736 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4737 "[$s, \\{$l, $x, $y, $y\\}];", 4738 []>; 4739def SULD_2D_ARRAY_V4I32_ZERO 4740 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4741 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4742 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4743 "[$s, \\{$l, $x, $y, $y\\}];", 4744 []>; 4745 4746 4747def SULD_3D_V4I8_ZERO 4748 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4749 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4750 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4751 "[$s, \\{$x, $y, $z, $z\\}];", 4752 []>; 4753def SULD_3D_V4I16_ZERO 4754 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4755 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4756 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4757 "[$s, \\{$x, $y, $z, $z\\}];", 4758 []>; 4759def SULD_3D_V4I32_ZERO 4760 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4761 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4762 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4763 "[$s, \\{$x, $y, $z, $z\\}];", 4764 []>; 4765} 4766 4767//----------------------------------- 4768// Texture Query Intrinsics 4769//----------------------------------- 4770 4771let IsSurfTexQuery = true in { 4772def TXQ_CHANNEL_ORDER 4773 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4774 "txq.channel_order.b32 \t$d, [$a];", 4775 []>; 4776def TXQ_CHANNEL_DATA_TYPE 4777 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4778 "txq.channel_data_type.b32 \t$d, [$a];", 4779 []>; 4780def TXQ_WIDTH 4781 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4782 "txq.width.b32 \t$d, [$a];", 4783 []>; 4784def TXQ_HEIGHT 4785 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4786 "txq.height.b32 \t$d, [$a];", 4787 []>; 4788def TXQ_DEPTH 4789 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4790 "txq.depth.b32 \t$d, [$a];", 4791 []>; 4792def TXQ_ARRAY_SIZE 4793 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4794 "txq.array_size.b32 \t$d, [$a];", 4795 []>; 4796def TXQ_NUM_SAMPLES 4797 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4798 "txq.num_samples.b32 \t$d, [$a];", 4799 []>; 4800def TXQ_NUM_MIPMAP_LEVELS 4801 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4802 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4803 []>; 4804} 4805 4806def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4807 (TXQ_CHANNEL_ORDER Int64Regs:$a)>; 4808def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4809 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4810def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4811 (TXQ_WIDTH Int64Regs:$a)>; 4812def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4813 (TXQ_HEIGHT Int64Regs:$a)>; 4814def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4815 (TXQ_DEPTH Int64Regs:$a)>; 4816def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4817 (TXQ_ARRAY_SIZE Int64Regs:$a)>; 4818def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4819 (TXQ_NUM_SAMPLES Int64Regs:$a)>; 4820def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4821 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; 4822 4823 4824//----------------------------------- 4825// Surface Query Intrinsics 4826//----------------------------------- 4827 4828let IsSurfTexQuery = true in { 4829def SUQ_CHANNEL_ORDER 4830 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4831 "suq.channel_order.b32 \t$d, [$a];", 4832 []>; 4833def SUQ_CHANNEL_DATA_TYPE 4834 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4835 "suq.channel_data_type.b32 \t$d, [$a];", 4836 []>; 4837def SUQ_WIDTH 4838 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4839 "suq.width.b32 \t$d, [$a];", 4840 []>; 4841def SUQ_HEIGHT 4842 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4843 "suq.height.b32 \t$d, [$a];", 4844 []>; 4845def SUQ_DEPTH 4846 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4847 "suq.depth.b32 \t$d, [$a];", 4848 []>; 4849def SUQ_ARRAY_SIZE 4850 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4851 "suq.array_size.b32 \t$d, [$a];", 4852 []>; 4853} 4854 4855def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4856 (SUQ_CHANNEL_ORDER Int64Regs:$a)>; 4857def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4858 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4859def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4860 (SUQ_WIDTH Int64Regs:$a)>; 4861def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4862 (SUQ_HEIGHT Int64Regs:$a)>; 4863def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4864 (SUQ_DEPTH Int64Regs:$a)>; 4865def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4866 (SUQ_ARRAY_SIZE Int64Regs:$a)>; 4867 4868 4869//===- Handle Query -------------------------------------------------------===// 4870 4871// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4872def ISTYPEP_SAMPLER 4873 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4874 "istypep.samplerref \t$d, $a;", 4875 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4876def ISTYPEP_SURFACE 4877 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4878 "istypep.surfref \t$d, $a;", 4879 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4880def ISTYPEP_TEXTURE 4881 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4882 "istypep.texref \t$d, $a;", 4883 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4884 4885//===- Surface Stores -----------------------------------------------------===// 4886 4887let IsSust = true in { 4888// Unformatted 4889// .clamp variant 4890def SUST_B_1D_B8_CLAMP 4891 : NVPTXInst<(outs), 4892 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4893 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4894 []>; 4895def SUST_B_1D_B16_CLAMP 4896 : NVPTXInst<(outs), 4897 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4898 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4899 []>; 4900def SUST_B_1D_B32_CLAMP 4901 : NVPTXInst<(outs), 4902 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4903 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4904 []>; 4905def SUST_B_1D_B64_CLAMP 4906 : NVPTXInst<(outs), 4907 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4908 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4909 []>; 4910def SUST_B_1D_V2B8_CLAMP 4911 : NVPTXInst<(outs), 4912 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4913 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4914 []>; 4915def SUST_B_1D_V2B16_CLAMP 4916 : NVPTXInst<(outs), 4917 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4918 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4919 []>; 4920def SUST_B_1D_V2B32_CLAMP 4921 : NVPTXInst<(outs), 4922 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4923 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4924 []>; 4925def SUST_B_1D_V2B64_CLAMP 4926 : NVPTXInst<(outs), 4927 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4928 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4929 []>; 4930def SUST_B_1D_V4B8_CLAMP 4931 : NVPTXInst<(outs), 4932 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4933 Int16Regs:$b, Int16Regs:$a), 4934 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4935 []>; 4936def SUST_B_1D_V4B16_CLAMP 4937 : NVPTXInst<(outs), 4938 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4939 Int16Regs:$b, Int16Regs:$a), 4940 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4941 []>; 4942def SUST_B_1D_V4B32_CLAMP 4943 : NVPTXInst<(outs), 4944 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 4945 Int32Regs:$b, Int32Regs:$a), 4946 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4947 []>; 4948 4949 4950def SUST_B_1D_ARRAY_B8_CLAMP 4951 : NVPTXInst<(outs), 4952 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4953 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4954 []>; 4955def SUST_B_1D_ARRAY_B16_CLAMP 4956 : NVPTXInst<(outs), 4957 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4958 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4959 []>; 4960def SUST_B_1D_ARRAY_B32_CLAMP 4961 : NVPTXInst<(outs), 4962 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 4963 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4964 []>; 4965def SUST_B_1D_ARRAY_B64_CLAMP 4966 : NVPTXInst<(outs), 4967 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 4968 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4969 []>; 4970def SUST_B_1D_ARRAY_V2B8_CLAMP 4971 : NVPTXInst<(outs), 4972 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4973 Int16Regs:$g), 4974 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4975 []>; 4976def SUST_B_1D_ARRAY_V2B16_CLAMP 4977 : NVPTXInst<(outs), 4978 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4979 Int16Regs:$g), 4980 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4981 []>; 4982def SUST_B_1D_ARRAY_V2B32_CLAMP 4983 : NVPTXInst<(outs), 4984 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 4985 Int32Regs:$g), 4986 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4987 []>; 4988def SUST_B_1D_ARRAY_V2B64_CLAMP 4989 : NVPTXInst<(outs), 4990 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 4991 Int64Regs:$g), 4992 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4993 []>; 4994def SUST_B_1D_ARRAY_V4B8_CLAMP 4995 : NVPTXInst<(outs), 4996 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4997 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4998 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " 4999 "\\{$r, $g, $b, $a\\};", 5000 []>; 5001def SUST_B_1D_ARRAY_V4B16_CLAMP 5002 : NVPTXInst<(outs), 5003 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5004 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5005 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " 5006 "\\{$r, $g, $b, $a\\};", 5007 []>; 5008def SUST_B_1D_ARRAY_V4B32_CLAMP 5009 : NVPTXInst<(outs), 5010 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5011 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5012 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " 5013 "\\{$r, $g, $b, $a\\};", 5014 []>; 5015 5016 5017def SUST_B_2D_B8_CLAMP 5018 : NVPTXInst<(outs), 5019 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5020 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 5021 []>; 5022def SUST_B_2D_B16_CLAMP 5023 : NVPTXInst<(outs), 5024 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5025 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 5026 []>; 5027def SUST_B_2D_B32_CLAMP 5028 : NVPTXInst<(outs), 5029 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5030 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 5031 []>; 5032def SUST_B_2D_B64_CLAMP 5033 : NVPTXInst<(outs), 5034 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5035 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 5036 []>; 5037def SUST_B_2D_V2B8_CLAMP 5038 : NVPTXInst<(outs), 5039 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5040 Int16Regs:$g), 5041 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5042 []>; 5043def SUST_B_2D_V2B16_CLAMP 5044 : NVPTXInst<(outs), 5045 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5046 Int16Regs:$g), 5047 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5048 []>; 5049def SUST_B_2D_V2B32_CLAMP 5050 : NVPTXInst<(outs), 5051 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5052 Int32Regs:$g), 5053 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5054 []>; 5055def SUST_B_2D_V2B64_CLAMP 5056 : NVPTXInst<(outs), 5057 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5058 Int64Regs:$g), 5059 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5060 []>; 5061def SUST_B_2D_V4B8_CLAMP 5062 : NVPTXInst<(outs), 5063 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5064 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5065 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " 5066 "\\{$r, $g, $b, $a\\};", 5067 []>; 5068def SUST_B_2D_V4B16_CLAMP 5069 : NVPTXInst<(outs), 5070 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5071 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5072 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " 5073 "\\{$r, $g, $b, $a\\};", 5074 []>; 5075def SUST_B_2D_V4B32_CLAMP 5076 : NVPTXInst<(outs), 5077 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5078 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5079 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " 5080 "\\{$r, $g, $b, $a\\};", 5081 []>; 5082 5083 5084def SUST_B_2D_ARRAY_B8_CLAMP 5085 : NVPTXInst<(outs), 5086 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5087 Int16Regs:$r), 5088 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5089 []>; 5090def SUST_B_2D_ARRAY_B16_CLAMP 5091 : NVPTXInst<(outs), 5092 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5093 Int16Regs:$r), 5094 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5095 []>; 5096def SUST_B_2D_ARRAY_B32_CLAMP 5097 : NVPTXInst<(outs), 5098 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5099 Int32Regs:$r), 5100 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5101 []>; 5102def SUST_B_2D_ARRAY_B64_CLAMP 5103 : NVPTXInst<(outs), 5104 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5105 Int64Regs:$r), 5106 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5107 []>; 5108def SUST_B_2D_ARRAY_V2B8_CLAMP 5109 : NVPTXInst<(outs), 5110 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5111 Int16Regs:$r, Int16Regs:$g), 5112 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5113 "\\{$r, $g\\};", 5114 []>; 5115def SUST_B_2D_ARRAY_V2B16_CLAMP 5116 : NVPTXInst<(outs), 5117 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5118 Int16Regs:$r, Int16Regs:$g), 5119 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5120 "\\{$r, $g\\};", 5121 []>; 5122def SUST_B_2D_ARRAY_V2B32_CLAMP 5123 : NVPTXInst<(outs), 5124 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5125 Int32Regs:$r, Int32Regs:$g), 5126 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5127 "\\{$r, $g\\};", 5128 []>; 5129def SUST_B_2D_ARRAY_V2B64_CLAMP 5130 : NVPTXInst<(outs), 5131 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5132 Int64Regs:$r, Int64Regs:$g), 5133 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5134 "\\{$r, $g\\};", 5135 []>; 5136def SUST_B_2D_ARRAY_V4B8_CLAMP 5137 : NVPTXInst<(outs), 5138 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5139 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5140 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5141 "\\{$r, $g, $b, $a\\};", 5142 []>; 5143def SUST_B_2D_ARRAY_V4B16_CLAMP 5144 : NVPTXInst<(outs), 5145 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5146 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5147 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5148 "\\{$r, $g, $b, $a\\};", 5149 []>; 5150def SUST_B_2D_ARRAY_V4B32_CLAMP 5151 : NVPTXInst<(outs), 5152 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5153 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5154 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 5155 "\\{$r, $g, $b, $a\\};", 5156 []>; 5157 5158 5159def SUST_B_3D_B8_CLAMP 5160 : NVPTXInst<(outs), 5161 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5162 Int16Regs:$r), 5163 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5164 []>; 5165def SUST_B_3D_B16_CLAMP 5166 : NVPTXInst<(outs), 5167 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5168 Int16Regs:$r), 5169 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5170 []>; 5171def SUST_B_3D_B32_CLAMP 5172 : NVPTXInst<(outs), 5173 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5174 Int32Regs:$r), 5175 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5176 []>; 5177def SUST_B_3D_B64_CLAMP 5178 : NVPTXInst<(outs), 5179 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5180 Int64Regs:$r), 5181 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5182 []>; 5183def SUST_B_3D_V2B8_CLAMP 5184 : NVPTXInst<(outs), 5185 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5186 Int16Regs:$r, Int16Regs:$g), 5187 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5188 "\\{$r, $g\\};", 5189 []>; 5190def SUST_B_3D_V2B16_CLAMP 5191 : NVPTXInst<(outs), 5192 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5193 Int16Regs:$r, Int16Regs:$g), 5194 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5195 "\\{$r, $g\\};", 5196 []>; 5197def SUST_B_3D_V2B32_CLAMP 5198 : NVPTXInst<(outs), 5199 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5200 Int32Regs:$r, Int32Regs:$g), 5201 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5202 "\\{$r, $g\\};", 5203 []>; 5204def SUST_B_3D_V2B64_CLAMP 5205 : NVPTXInst<(outs), 5206 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5207 Int64Regs:$r, Int64Regs:$g), 5208 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5209 "\\{$r, $g\\};", 5210 []>; 5211def SUST_B_3D_V4B8_CLAMP 5212 : NVPTXInst<(outs), 5213 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5214 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5215 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5216 "\\{$r, $g, $b, $a\\};", 5217 []>; 5218def SUST_B_3D_V4B16_CLAMP 5219 : NVPTXInst<(outs), 5220 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5221 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5222 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5223 "\\{$r, $g, $b, $a\\};", 5224 []>; 5225def SUST_B_3D_V4B32_CLAMP 5226 : NVPTXInst<(outs), 5227 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5228 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5229 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5230 "\\{$r, $g, $b, $a\\};", 5231 []>; 5232 5233 5234// .trap variant 5235def SUST_B_1D_B8_TRAP 5236 : NVPTXInst<(outs), 5237 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5238 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5239 []>; 5240def SUST_B_1D_B16_TRAP 5241 : NVPTXInst<(outs), 5242 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5243 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5244 []>; 5245def SUST_B_1D_B32_TRAP 5246 : NVPTXInst<(outs), 5247 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5248 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5249 []>; 5250def SUST_B_1D_B64_TRAP 5251 : NVPTXInst<(outs), 5252 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5253 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", 5254 []>; 5255def SUST_B_1D_V2B8_TRAP 5256 : NVPTXInst<(outs), 5257 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5258 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5259 []>; 5260def SUST_B_1D_V2B16_TRAP 5261 : NVPTXInst<(outs), 5262 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5263 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5264 []>; 5265def SUST_B_1D_V2B32_TRAP 5266 : NVPTXInst<(outs), 5267 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5268 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5269 []>; 5270def SUST_B_1D_V2B64_TRAP 5271 : NVPTXInst<(outs), 5272 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5273 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5274 []>; 5275def SUST_B_1D_V4B8_TRAP 5276 : NVPTXInst<(outs), 5277 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5278 Int16Regs:$b, Int16Regs:$a), 5279 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5280 []>; 5281def SUST_B_1D_V4B16_TRAP 5282 : NVPTXInst<(outs), 5283 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5284 Int16Regs:$b, Int16Regs:$a), 5285 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5286 []>; 5287def SUST_B_1D_V4B32_TRAP 5288 : NVPTXInst<(outs), 5289 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5290 Int32Regs:$b, Int32Regs:$a), 5291 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5292 []>; 5293 5294 5295def SUST_B_1D_ARRAY_B8_TRAP 5296 : NVPTXInst<(outs), 5297 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5298 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5299 []>; 5300def SUST_B_1D_ARRAY_B16_TRAP 5301 : NVPTXInst<(outs), 5302 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5303 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5304 []>; 5305def SUST_B_1D_ARRAY_B32_TRAP 5306 : NVPTXInst<(outs), 5307 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5308 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5309 []>; 5310def SUST_B_1D_ARRAY_B64_TRAP 5311 : NVPTXInst<(outs), 5312 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5313 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5314 []>; 5315def SUST_B_1D_ARRAY_V2B8_TRAP 5316 : NVPTXInst<(outs), 5317 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5318 Int16Regs:$g), 5319 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5320 []>; 5321def SUST_B_1D_ARRAY_V2B16_TRAP 5322 : NVPTXInst<(outs), 5323 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5324 Int16Regs:$g), 5325 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5326 []>; 5327def SUST_B_1D_ARRAY_V2B32_TRAP 5328 : NVPTXInst<(outs), 5329 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5330 Int32Regs:$g), 5331 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5332 []>; 5333def SUST_B_1D_ARRAY_V2B64_TRAP 5334 : NVPTXInst<(outs), 5335 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5336 Int64Regs:$g), 5337 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5338 []>; 5339def SUST_B_1D_ARRAY_V4B8_TRAP 5340 : NVPTXInst<(outs), 5341 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5342 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5343 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 5344 "\\{$r, $g, $b, $a\\};", 5345 []>; 5346def SUST_B_1D_ARRAY_V4B16_TRAP 5347 : NVPTXInst<(outs), 5348 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5349 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5350 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 5351 "\\{$r, $g, $b, $a\\};", 5352 []>; 5353def SUST_B_1D_ARRAY_V4B32_TRAP 5354 : NVPTXInst<(outs), 5355 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5356 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5357 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 5358 "\\{$r, $g, $b, $a\\};", 5359 []>; 5360 5361 5362def SUST_B_2D_B8_TRAP 5363 : NVPTXInst<(outs), 5364 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5365 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5366 []>; 5367def SUST_B_2D_B16_TRAP 5368 : NVPTXInst<(outs), 5369 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5370 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5371 []>; 5372def SUST_B_2D_B32_TRAP 5373 : NVPTXInst<(outs), 5374 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5375 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5376 []>; 5377def SUST_B_2D_B64_TRAP 5378 : NVPTXInst<(outs), 5379 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5380 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5381 []>; 5382def SUST_B_2D_V2B8_TRAP 5383 : NVPTXInst<(outs), 5384 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5385 Int16Regs:$g), 5386 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5387 []>; 5388def SUST_B_2D_V2B16_TRAP 5389 : NVPTXInst<(outs), 5390 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5391 Int16Regs:$g), 5392 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5393 []>; 5394def SUST_B_2D_V2B32_TRAP 5395 : NVPTXInst<(outs), 5396 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5397 Int32Regs:$g), 5398 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5399 []>; 5400def SUST_B_2D_V2B64_TRAP 5401 : NVPTXInst<(outs), 5402 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5403 Int64Regs:$g), 5404 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5405 []>; 5406def SUST_B_2D_V4B8_TRAP 5407 : NVPTXInst<(outs), 5408 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5409 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5410 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 5411 "\\{$r, $g, $b, $a\\};", 5412 []>; 5413def SUST_B_2D_V4B16_TRAP 5414 : NVPTXInst<(outs), 5415 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5416 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5417 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 5418 "\\{$r, $g, $b, $a\\};", 5419 []>; 5420def SUST_B_2D_V4B32_TRAP 5421 : NVPTXInst<(outs), 5422 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5423 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5424 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 5425 "\\{$r, $g, $b, $a\\};", 5426 []>; 5427 5428 5429def SUST_B_2D_ARRAY_B8_TRAP 5430 : NVPTXInst<(outs), 5431 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5432 Int16Regs:$r), 5433 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5434 []>; 5435def SUST_B_2D_ARRAY_B16_TRAP 5436 : NVPTXInst<(outs), 5437 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5438 Int16Regs:$r), 5439 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5440 []>; 5441def SUST_B_2D_ARRAY_B32_TRAP 5442 : NVPTXInst<(outs), 5443 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5444 Int32Regs:$r), 5445 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5446 []>; 5447def SUST_B_2D_ARRAY_B64_TRAP 5448 : NVPTXInst<(outs), 5449 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5450 Int64Regs:$r), 5451 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5452 []>; 5453def SUST_B_2D_ARRAY_V2B8_TRAP 5454 : NVPTXInst<(outs), 5455 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5456 Int16Regs:$r, Int16Regs:$g), 5457 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5458 "\\{$r, $g\\};", 5459 []>; 5460def SUST_B_2D_ARRAY_V2B16_TRAP 5461 : NVPTXInst<(outs), 5462 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5463 Int16Regs:$r, Int16Regs:$g), 5464 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5465 "\\{$r, $g\\};", 5466 []>; 5467def SUST_B_2D_ARRAY_V2B32_TRAP 5468 : NVPTXInst<(outs), 5469 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5470 Int32Regs:$r, Int32Regs:$g), 5471 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5472 "\\{$r, $g\\};", 5473 []>; 5474def SUST_B_2D_ARRAY_V2B64_TRAP 5475 : NVPTXInst<(outs), 5476 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5477 Int64Regs:$r, Int64Regs:$g), 5478 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5479 "\\{$r, $g\\};", 5480 []>; 5481def SUST_B_2D_ARRAY_V4B8_TRAP 5482 : NVPTXInst<(outs), 5483 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5484 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5485 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5486 "\\{$r, $g, $b, $a\\};", 5487 []>; 5488def SUST_B_2D_ARRAY_V4B16_TRAP 5489 : NVPTXInst<(outs), 5490 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5491 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5492 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5493 "\\{$r, $g, $b, $a\\};", 5494 []>; 5495def SUST_B_2D_ARRAY_V4B32_TRAP 5496 : NVPTXInst<(outs), 5497 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5498 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5499 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5500 "\\{$r, $g, $b, $a\\};", 5501 []>; 5502 5503 5504def SUST_B_3D_B8_TRAP 5505 : NVPTXInst<(outs), 5506 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5507 Int16Regs:$r), 5508 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5509 []>; 5510def SUST_B_3D_B16_TRAP 5511 : NVPTXInst<(outs), 5512 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5513 Int16Regs:$r), 5514 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5515 []>; 5516def SUST_B_3D_B32_TRAP 5517 : NVPTXInst<(outs), 5518 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5519 Int32Regs:$r), 5520 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5521 []>; 5522def SUST_B_3D_B64_TRAP 5523 : NVPTXInst<(outs), 5524 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5525 Int64Regs:$r), 5526 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5527 []>; 5528def SUST_B_3D_V2B8_TRAP 5529 : NVPTXInst<(outs), 5530 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5531 Int16Regs:$r, Int16Regs:$g), 5532 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5533 "\\{$r, $g\\};", 5534 []>; 5535def SUST_B_3D_V2B16_TRAP 5536 : NVPTXInst<(outs), 5537 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5538 Int16Regs:$r, Int16Regs:$g), 5539 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5540 "\\{$r, $g\\};", 5541 []>; 5542def SUST_B_3D_V2B32_TRAP 5543 : NVPTXInst<(outs), 5544 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5545 Int32Regs:$r, Int32Regs:$g), 5546 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5547 "\\{$r, $g\\};", 5548 []>; 5549def SUST_B_3D_V2B64_TRAP 5550 : NVPTXInst<(outs), 5551 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5552 Int64Regs:$r, Int64Regs:$g), 5553 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5554 "\\{$r, $g\\};", 5555 []>; 5556def SUST_B_3D_V4B8_TRAP 5557 : NVPTXInst<(outs), 5558 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5559 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5560 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5561 "\\{$r, $g, $b, $a\\};", 5562 []>; 5563def SUST_B_3D_V4B16_TRAP 5564 : NVPTXInst<(outs), 5565 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5566 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5567 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5568 "\\{$r, $g, $b, $a\\};", 5569 []>; 5570def SUST_B_3D_V4B32_TRAP 5571 : NVPTXInst<(outs), 5572 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5573 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5574 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5575 "\\{$r, $g, $b, $a\\};", 5576 []>; 5577 5578 5579// .zero variant 5580def SUST_B_1D_B8_ZERO 5581 : NVPTXInst<(outs), 5582 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5583 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", 5584 []>; 5585def SUST_B_1D_B16_ZERO 5586 : NVPTXInst<(outs), 5587 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5588 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", 5589 []>; 5590def SUST_B_1D_B32_ZERO 5591 : NVPTXInst<(outs), 5592 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5593 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", 5594 []>; 5595def SUST_B_1D_B64_ZERO 5596 : NVPTXInst<(outs), 5597 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5598 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", 5599 []>; 5600def SUST_B_1D_V2B8_ZERO 5601 : NVPTXInst<(outs), 5602 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5603 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5604 []>; 5605def SUST_B_1D_V2B16_ZERO 5606 : NVPTXInst<(outs), 5607 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5608 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5609 []>; 5610def SUST_B_1D_V2B32_ZERO 5611 : NVPTXInst<(outs), 5612 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5613 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5614 []>; 5615def SUST_B_1D_V2B64_ZERO 5616 : NVPTXInst<(outs), 5617 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5618 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5619 []>; 5620def SUST_B_1D_V4B8_ZERO 5621 : NVPTXInst<(outs), 5622 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5623 Int16Regs:$b, Int16Regs:$a), 5624 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5625 []>; 5626def SUST_B_1D_V4B16_ZERO 5627 : NVPTXInst<(outs), 5628 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5629 Int16Regs:$b, Int16Regs:$a), 5630 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5631 []>; 5632def SUST_B_1D_V4B32_ZERO 5633 : NVPTXInst<(outs), 5634 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5635 Int32Regs:$b, Int32Regs:$a), 5636 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5637 []>; 5638 5639 5640def SUST_B_1D_ARRAY_B8_ZERO 5641 : NVPTXInst<(outs), 5642 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5643 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5644 []>; 5645def SUST_B_1D_ARRAY_B16_ZERO 5646 : NVPTXInst<(outs), 5647 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5648 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5649 []>; 5650def SUST_B_1D_ARRAY_B32_ZERO 5651 : NVPTXInst<(outs), 5652 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5653 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5654 []>; 5655def SUST_B_1D_ARRAY_B64_ZERO 5656 : NVPTXInst<(outs), 5657 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5658 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5659 []>; 5660def SUST_B_1D_ARRAY_V2B8_ZERO 5661 : NVPTXInst<(outs), 5662 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5663 Int16Regs:$g), 5664 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5665 []>; 5666def SUST_B_1D_ARRAY_V2B16_ZERO 5667 : NVPTXInst<(outs), 5668 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5669 Int16Regs:$g), 5670 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5671 []>; 5672def SUST_B_1D_ARRAY_V2B32_ZERO 5673 : NVPTXInst<(outs), 5674 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5675 Int32Regs:$g), 5676 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5677 []>; 5678def SUST_B_1D_ARRAY_V2B64_ZERO 5679 : NVPTXInst<(outs), 5680 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5681 Int64Regs:$g), 5682 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5683 []>; 5684def SUST_B_1D_ARRAY_V4B8_ZERO 5685 : NVPTXInst<(outs), 5686 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5687 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5688 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " 5689 "\\{$r, $g, $b, $a\\};", 5690 []>; 5691def SUST_B_1D_ARRAY_V4B16_ZERO 5692 : NVPTXInst<(outs), 5693 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5694 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5695 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " 5696 "\\{$r, $g, $b, $a\\};", 5697 []>; 5698def SUST_B_1D_ARRAY_V4B32_ZERO 5699 : NVPTXInst<(outs), 5700 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5701 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5702 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " 5703 "\\{$r, $g, $b, $a\\};", 5704 []>; 5705 5706 5707def SUST_B_2D_B8_ZERO 5708 : NVPTXInst<(outs), 5709 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5710 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5711 []>; 5712def SUST_B_2D_B16_ZERO 5713 : NVPTXInst<(outs), 5714 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5715 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5716 []>; 5717def SUST_B_2D_B32_ZERO 5718 : NVPTXInst<(outs), 5719 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5720 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5721 []>; 5722def SUST_B_2D_B64_ZERO 5723 : NVPTXInst<(outs), 5724 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5725 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5726 []>; 5727def SUST_B_2D_V2B8_ZERO 5728 : NVPTXInst<(outs), 5729 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5730 Int16Regs:$g), 5731 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5732 []>; 5733def SUST_B_2D_V2B16_ZERO 5734 : NVPTXInst<(outs), 5735 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5736 Int16Regs:$g), 5737 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5738 []>; 5739def SUST_B_2D_V2B32_ZERO 5740 : NVPTXInst<(outs), 5741 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5742 Int32Regs:$g), 5743 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5744 []>; 5745def SUST_B_2D_V2B64_ZERO 5746 : NVPTXInst<(outs), 5747 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5748 Int64Regs:$g), 5749 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5750 []>; 5751def SUST_B_2D_V4B8_ZERO 5752 : NVPTXInst<(outs), 5753 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5754 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5755 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " 5756 "\\{$r, $g, $b, $a\\};", 5757 []>; 5758def SUST_B_2D_V4B16_ZERO 5759 : NVPTXInst<(outs), 5760 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5761 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5762 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " 5763 "\\{$r, $g, $b, $a\\};", 5764 []>; 5765def SUST_B_2D_V4B32_ZERO 5766 : NVPTXInst<(outs), 5767 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5768 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5769 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " 5770 "\\{$r, $g, $b, $a\\};", 5771 []>; 5772 5773 5774def SUST_B_2D_ARRAY_B8_ZERO 5775 : NVPTXInst<(outs), 5776 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5777 Int16Regs:$r), 5778 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5779 []>; 5780def SUST_B_2D_ARRAY_B16_ZERO 5781 : NVPTXInst<(outs), 5782 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5783 Int16Regs:$r), 5784 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5785 []>; 5786def SUST_B_2D_ARRAY_B32_ZERO 5787 : NVPTXInst<(outs), 5788 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5789 Int32Regs:$r), 5790 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5791 []>; 5792def SUST_B_2D_ARRAY_B64_ZERO 5793 : NVPTXInst<(outs), 5794 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5795 Int64Regs:$r), 5796 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5797 []>; 5798def SUST_B_2D_ARRAY_V2B8_ZERO 5799 : NVPTXInst<(outs), 5800 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5801 Int16Regs:$r, Int16Regs:$g), 5802 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5803 "\\{$r, $g\\};", 5804 []>; 5805def SUST_B_2D_ARRAY_V2B16_ZERO 5806 : NVPTXInst<(outs), 5807 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5808 Int16Regs:$r, Int16Regs:$g), 5809 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5810 "\\{$r, $g\\};", 5811 []>; 5812def SUST_B_2D_ARRAY_V2B32_ZERO 5813 : NVPTXInst<(outs), 5814 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5815 Int32Regs:$r, Int32Regs:$g), 5816 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5817 "\\{$r, $g\\};", 5818 []>; 5819def SUST_B_2D_ARRAY_V2B64_ZERO 5820 : NVPTXInst<(outs), 5821 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5822 Int64Regs:$r, Int64Regs:$g), 5823 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5824 "\\{$r, $g\\};", 5825 []>; 5826def SUST_B_2D_ARRAY_V4B8_ZERO 5827 : NVPTXInst<(outs), 5828 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5829 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5830 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5831 "\\{$r, $g, $b, $a\\};", 5832 []>; 5833def SUST_B_2D_ARRAY_V4B16_ZERO 5834 : NVPTXInst<(outs), 5835 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5836 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5837 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5838 "\\{$r, $g, $b, $a\\};", 5839 []>; 5840def SUST_B_2D_ARRAY_V4B32_ZERO 5841 : NVPTXInst<(outs), 5842 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5843 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5844 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5845 "\\{$r, $g, $b, $a\\};", 5846 []>; 5847 5848 5849def SUST_B_3D_B8_ZERO 5850 : NVPTXInst<(outs), 5851 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5852 Int16Regs:$r), 5853 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5854 []>; 5855def SUST_B_3D_B16_ZERO 5856 : NVPTXInst<(outs), 5857 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5858 Int16Regs:$r), 5859 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5860 []>; 5861def SUST_B_3D_B32_ZERO 5862 : NVPTXInst<(outs), 5863 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5864 Int32Regs:$r), 5865 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5866 []>; 5867def SUST_B_3D_B64_ZERO 5868 : NVPTXInst<(outs), 5869 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5870 Int64Regs:$r), 5871 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5872 []>; 5873def SUST_B_3D_V2B8_ZERO 5874 : NVPTXInst<(outs), 5875 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5876 Int16Regs:$r, Int16Regs:$g), 5877 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5878 "\\{$r, $g\\};", 5879 []>; 5880def SUST_B_3D_V2B16_ZERO 5881 : NVPTXInst<(outs), 5882 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5883 Int16Regs:$r, Int16Regs:$g), 5884 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5885 "\\{$r, $g\\};", 5886 []>; 5887def SUST_B_3D_V2B32_ZERO 5888 : NVPTXInst<(outs), 5889 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5890 Int32Regs:$r, Int32Regs:$g), 5891 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5892 "\\{$r, $g\\};", 5893 []>; 5894def SUST_B_3D_V2B64_ZERO 5895 : NVPTXInst<(outs), 5896 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5897 Int64Regs:$r, Int64Regs:$g), 5898 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5899 "\\{$r, $g\\};", 5900 []>; 5901def SUST_B_3D_V4B8_ZERO 5902 : NVPTXInst<(outs), 5903 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5904 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5905 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5906 "\\{$r, $g, $b, $a\\};", 5907 []>; 5908def SUST_B_3D_V4B16_ZERO 5909 : NVPTXInst<(outs), 5910 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5911 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5912 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5913 "\\{$r, $g, $b, $a\\};", 5914 []>; 5915def SUST_B_3D_V4B32_ZERO 5916 : NVPTXInst<(outs), 5917 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5918 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5919 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5920 "\\{$r, $g, $b, $a\\};", 5921 []>; 5922 5923 5924 5925// Formatted 5926 5927def SUST_P_1D_B8_TRAP 5928 : NVPTXInst<(outs), 5929 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5930 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5931 []>; 5932def SUST_P_1D_B16_TRAP 5933 : NVPTXInst<(outs), 5934 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5935 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5936 []>; 5937def SUST_P_1D_B32_TRAP 5938 : NVPTXInst<(outs), 5939 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5940 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5941 []>; 5942def SUST_P_1D_V2B8_TRAP 5943 : NVPTXInst<(outs), 5944 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5945 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5946 []>; 5947def SUST_P_1D_V2B16_TRAP 5948 : NVPTXInst<(outs), 5949 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5950 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5951 []>; 5952def SUST_P_1D_V2B32_TRAP 5953 : NVPTXInst<(outs), 5954 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5955 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5956 []>; 5957def SUST_P_1D_V4B8_TRAP 5958 : NVPTXInst<(outs), 5959 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5960 Int16Regs:$b, Int16Regs:$a), 5961 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5962 []>; 5963def SUST_P_1D_V4B16_TRAP 5964 : NVPTXInst<(outs), 5965 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5966 Int16Regs:$b, Int16Regs:$a), 5967 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5968 []>; 5969def SUST_P_1D_V4B32_TRAP 5970 : NVPTXInst<(outs), 5971 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5972 Int32Regs:$b, Int32Regs:$a), 5973 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5974 []>; 5975 5976 5977def SUST_P_1D_ARRAY_B8_TRAP 5978 : NVPTXInst<(outs), 5979 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5980 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5981 []>; 5982def SUST_P_1D_ARRAY_B16_TRAP 5983 : NVPTXInst<(outs), 5984 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5985 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5986 []>; 5987def SUST_P_1D_ARRAY_B32_TRAP 5988 : NVPTXInst<(outs), 5989 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5990 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5991 []>; 5992def SUST_P_1D_ARRAY_V2B8_TRAP 5993 : NVPTXInst<(outs), 5994 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5995 Int16Regs:$g), 5996 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5997 []>; 5998def SUST_P_1D_ARRAY_V2B16_TRAP 5999 : NVPTXInst<(outs), 6000 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 6001 Int16Regs:$g), 6002 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 6003 []>; 6004def SUST_P_1D_ARRAY_V2B32_TRAP 6005 : NVPTXInst<(outs), 6006 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 6007 Int32Regs:$g), 6008 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 6009 []>; 6010def SUST_P_1D_ARRAY_V4B8_TRAP 6011 : NVPTXInst<(outs), 6012 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 6013 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6014 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 6015 "\\{$r, $g, $b, $a\\};", 6016 []>; 6017def SUST_P_1D_ARRAY_V4B16_TRAP 6018 : NVPTXInst<(outs), 6019 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 6020 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6021 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 6022 "\\{$r, $g, $b, $a\\};", 6023 []>; 6024def SUST_P_1D_ARRAY_V4B32_TRAP 6025 : NVPTXInst<(outs), 6026 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 6027 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6028 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 6029 "\\{$r, $g, $b, $a\\};", 6030 []>; 6031 6032 6033def SUST_P_2D_B8_TRAP 6034 : NVPTXInst<(outs), 6035 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6036 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 6037 []>; 6038def SUST_P_2D_B16_TRAP 6039 : NVPTXInst<(outs), 6040 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6041 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 6042 []>; 6043def SUST_P_2D_B32_TRAP 6044 : NVPTXInst<(outs), 6045 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6046 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 6047 []>; 6048def SUST_P_2D_V2B8_TRAP 6049 : NVPTXInst<(outs), 6050 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 6051 Int16Regs:$g), 6052 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 6053 []>; 6054def SUST_P_2D_V2B16_TRAP 6055 : NVPTXInst<(outs), 6056 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 6057 Int16Regs:$g), 6058 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 6059 []>; 6060def SUST_P_2D_V2B32_TRAP 6061 : NVPTXInst<(outs), 6062 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6063 Int32Regs:$g), 6064 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 6065 []>; 6066def SUST_P_2D_V4B8_TRAP 6067 : NVPTXInst<(outs), 6068 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 6069 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6070 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 6071 "\\{$r, $g, $b, $a\\};", 6072 []>; 6073def SUST_P_2D_V4B16_TRAP 6074 : NVPTXInst<(outs), 6075 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 6076 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6077 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 6078 "\\{$r, $g, $b, $a\\};", 6079 []>; 6080def SUST_P_2D_V4B32_TRAP 6081 : NVPTXInst<(outs), 6082 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6083 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6084 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 6085 "\\{$r, $g, $b, $a\\};", 6086 []>; 6087 6088 6089def SUST_P_2D_ARRAY_B8_TRAP 6090 : NVPTXInst<(outs), 6091 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6092 Int16Regs:$r), 6093 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 6094 []>; 6095def SUST_P_2D_ARRAY_B16_TRAP 6096 : NVPTXInst<(outs), 6097 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6098 Int16Regs:$r), 6099 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 6100 []>; 6101def SUST_P_2D_ARRAY_B32_TRAP 6102 : NVPTXInst<(outs), 6103 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6104 Int32Regs:$r), 6105 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 6106 []>; 6107def SUST_P_2D_ARRAY_V2B8_TRAP 6108 : NVPTXInst<(outs), 6109 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6110 Int16Regs:$r, Int16Regs:$g), 6111 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6112 "\\{$r, $g\\};", 6113 []>; 6114def SUST_P_2D_ARRAY_V2B16_TRAP 6115 : NVPTXInst<(outs), 6116 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6117 Int16Regs:$r, Int16Regs:$g), 6118 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6119 "\\{$r, $g\\};", 6120 []>; 6121def SUST_P_2D_ARRAY_V2B32_TRAP 6122 : NVPTXInst<(outs), 6123 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6124 Int32Regs:$r, Int32Regs:$g), 6125 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6126 "\\{$r, $g\\};", 6127 []>; 6128def SUST_P_2D_ARRAY_V4B8_TRAP 6129 : NVPTXInst<(outs), 6130 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6131 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6132 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6133 "\\{$r, $g, $b, $a\\};", 6134 []>; 6135def SUST_P_2D_ARRAY_V4B16_TRAP 6136 : NVPTXInst<(outs), 6137 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6138 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6139 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6140 "\\{$r, $g, $b, $a\\};", 6141 []>; 6142def SUST_P_2D_ARRAY_V4B32_TRAP 6143 : NVPTXInst<(outs), 6144 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 6145 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6146 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 6147 "\\{$r, $g, $b, $a\\};", 6148 []>; 6149 6150 6151def SUST_P_3D_B8_TRAP 6152 : NVPTXInst<(outs), 6153 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6154 Int16Regs:$r), 6155 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 6156 []>; 6157def SUST_P_3D_B16_TRAP 6158 : NVPTXInst<(outs), 6159 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6160 Int16Regs:$r), 6161 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 6162 []>; 6163def SUST_P_3D_B32_TRAP 6164 : NVPTXInst<(outs), 6165 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6166 Int32Regs:$r), 6167 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 6168 []>; 6169def SUST_P_3D_V2B8_TRAP 6170 : NVPTXInst<(outs), 6171 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6172 Int16Regs:$r, Int16Regs:$g), 6173 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6174 "\\{$r, $g\\};", 6175 []>; 6176def SUST_P_3D_V2B16_TRAP 6177 : NVPTXInst<(outs), 6178 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6179 Int16Regs:$r, Int16Regs:$g), 6180 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6181 "\\{$r, $g\\};", 6182 []>; 6183def SUST_P_3D_V2B32_TRAP 6184 : NVPTXInst<(outs), 6185 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6186 Int32Regs:$r, Int32Regs:$g), 6187 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6188 "\\{$r, $g\\};", 6189 []>; 6190def SUST_P_3D_V4B8_TRAP 6191 : NVPTXInst<(outs), 6192 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6193 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6194 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6195 "\\{$r, $g, $b, $a\\};", 6196 []>; 6197def SUST_P_3D_V4B16_TRAP 6198 : NVPTXInst<(outs), 6199 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6200 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6201 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6202 "\\{$r, $g, $b, $a\\};", 6203 []>; 6204def SUST_P_3D_V4B32_TRAP 6205 : NVPTXInst<(outs), 6206 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6207 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6208 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6209 "\\{$r, $g, $b, $a\\};", 6210 []>; 6211} 6212 6213// Surface store instruction patterns 6214// I'm not sure why we can't just include these in the instruction definitions, 6215// but TableGen complains of type errors :( 6216 6217// .clamp variant 6218def : Pat<(int_nvvm_sust_b_1d_i8_clamp 6219 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6220 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6221 6222def : Pat<(int_nvvm_sust_b_1d_i16_clamp 6223 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6224 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6225 6226def : Pat<(int_nvvm_sust_b_1d_i32_clamp 6227 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6228 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6229 6230def : Pat<(int_nvvm_sust_b_1d_i64_clamp 6231 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6232 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6233 6234def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 6235 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6236 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6237 Int16Regs:$r, Int16Regs:$g)>; 6238 6239def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 6240 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6241 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6242 Int16Regs:$r, Int16Regs:$g)>; 6243 6244def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 6245 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6246 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6247 Int32Regs:$r, Int32Regs:$g)>; 6248 6249def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 6250 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6251 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, 6252 Int64Regs:$r, Int64Regs:$g)>; 6253 6254def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 6255 Int64Regs:$s, Int32Regs:$x, 6256 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6257 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6258 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6259 6260def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 6261 Int64Regs:$s, Int32Regs:$x, 6262 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6263 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6264 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6265 6266def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 6267 Int64Regs:$s, Int32Regs:$x, 6268 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6269 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6270 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6271 6272 6273 6274def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 6275 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6276 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6277 Int16Regs:$r)>; 6278 6279def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 6280 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6281 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6282 Int16Regs:$r)>; 6283 6284def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 6285 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6286 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6287 Int32Regs:$r)>; 6288 6289def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 6290 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6291 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6292 Int64Regs:$r)>; 6293 6294def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 6295 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6296 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6297 Int16Regs:$r, Int16Regs:$g)>; 6298 6299def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 6300 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6301 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6302 Int16Regs:$r, Int16Regs:$g)>; 6303 6304def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 6305 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6306 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6307 Int32Regs:$r, Int32Regs:$g)>; 6308 6309def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 6310 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6311 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6312 Int64Regs:$r, Int64Regs:$g)>; 6313 6314def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 6315 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6316 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6317 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6318 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6319 6320def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 6321 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6322 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6323 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6324 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6325 6326def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 6327 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6328 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6329 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6330 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6331 6332 6333 6334def : Pat<(int_nvvm_sust_b_2d_i8_clamp 6335 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6336 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6337 Int16Regs:$r)>; 6338 6339def : Pat<(int_nvvm_sust_b_2d_i16_clamp 6340 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6341 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6342 Int16Regs:$r)>; 6343 6344def : Pat<(int_nvvm_sust_b_2d_i32_clamp 6345 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6346 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6347 Int32Regs:$r)>; 6348 6349def : Pat<(int_nvvm_sust_b_2d_i64_clamp 6350 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6351 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6352 Int64Regs:$r)>; 6353 6354def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 6355 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6356 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6357 Int16Regs:$r, Int16Regs:$g)>; 6358 6359def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 6360 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6361 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6362 Int16Regs:$r, Int16Regs:$g)>; 6363 6364def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 6365 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6366 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6367 Int32Regs:$r, Int32Regs:$g)>; 6368 6369def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 6370 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6371 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6372 Int64Regs:$r, Int64Regs:$g)>; 6373 6374def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 6375 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6376 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6377 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6378 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6379 6380def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 6381 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6382 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6383 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6384 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6385 6386def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 6387 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6388 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6389 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6390 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6391 6392 6393 6394def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 6395 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6396 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, 6397 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6398 Int16Regs:$r)>; 6399 6400def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 6401 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6402 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, 6403 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6404 Int16Regs:$r)>; 6405 6406def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 6407 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6408 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, 6409 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6410 Int32Regs:$r)>; 6411 6412def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 6413 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6414 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, 6415 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6416 Int64Regs:$r)>; 6417 6418def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 6419 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6420 Int16Regs:$r, Int16Regs:$g), 6421 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, 6422 Int32Regs:$x, Int32Regs:$y, 6423 Int16Regs:$r, Int16Regs:$g)>; 6424 6425def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 6426 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6427 Int16Regs:$r, Int16Regs:$g), 6428 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, 6429 Int32Regs:$x, Int32Regs:$y, 6430 Int16Regs:$r, Int16Regs:$g)>; 6431 6432def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 6433 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6434 Int32Regs:$g), 6435 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6436 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6437 6438def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 6439 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6440 Int64Regs:$g), 6441 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, 6442 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6443 6444def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 6445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6446 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6447 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, 6448 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6449 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6450 6451def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 6452 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6453 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6454 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, 6455 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6456 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6457 6458def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 6459 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6460 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6461 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6462 Int32Regs:$x, Int32Regs:$y, 6463 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6464 6465 6466 6467def : Pat<(int_nvvm_sust_b_3d_i8_clamp 6468 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6469 Int16Regs:$r), 6470 (SUST_B_3D_B8_CLAMP Int64Regs:$s, 6471 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6472 Int16Regs:$r)>; 6473 6474def : Pat<(int_nvvm_sust_b_3d_i16_clamp 6475 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6476 Int16Regs:$r), 6477 (SUST_B_3D_B16_CLAMP Int64Regs:$s, 6478 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6479 Int16Regs:$r)>; 6480 6481def : Pat<(int_nvvm_sust_b_3d_i32_clamp 6482 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6483 Int32Regs:$r), 6484 (SUST_B_3D_B32_CLAMP Int64Regs:$s, 6485 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6486 Int32Regs:$r)>; 6487 6488def : Pat<(int_nvvm_sust_b_3d_i64_clamp 6489 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6490 Int64Regs:$r), 6491 (SUST_B_3D_B64_CLAMP Int64Regs:$s, 6492 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6493 Int64Regs:$r)>; 6494 6495def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 6496 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6497 Int16Regs:$r, Int16Regs:$g), 6498 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, 6499 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6500 Int16Regs:$r, Int16Regs:$g)>; 6501 6502def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 6503 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6504 Int16Regs:$r, Int16Regs:$g), 6505 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, 6506 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6507 Int16Regs:$r, Int16Regs:$g)>; 6508 6509def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 6510 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6511 Int32Regs:$r, Int32Regs:$g), 6512 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, 6513 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6514 Int32Regs:$r, Int32Regs:$g)>; 6515 6516def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 6517 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6518 Int64Regs:$r, Int64Regs:$g), 6519 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, 6520 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6521 Int64Regs:$r, Int64Regs:$g)>; 6522 6523def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 6524 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6525 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6526 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, 6527 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6528 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6529 6530def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 6531 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6532 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6533 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, 6534 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6535 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6536 6537def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 6538 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6539 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6540 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, 6541 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6542 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6543 6544 6545// .trap variant 6546def : Pat<(int_nvvm_sust_b_1d_i8_trap 6547 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6548 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6549 6550def : Pat<(int_nvvm_sust_b_1d_i16_trap 6551 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6552 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6553 6554def : Pat<(int_nvvm_sust_b_1d_i32_trap 6555 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6556 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6557 6558def : Pat<(int_nvvm_sust_b_1d_i64_trap 6559 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6560 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6561 6562def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 6563 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6564 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 6565 Int16Regs:$r, Int16Regs:$g)>; 6566 6567def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 6568 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6569 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 6570 Int16Regs:$r, Int16Regs:$g)>; 6571 6572def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 6573 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6574 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 6575 Int32Regs:$r, Int32Regs:$g)>; 6576 6577def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 6578 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6579 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, 6580 Int64Regs:$r, Int64Regs:$g)>; 6581 6582def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 6583 Int64Regs:$s, Int32Regs:$x, 6584 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6585 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 6586 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6587 6588def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 6589 Int64Regs:$s, Int32Regs:$x, 6590 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6591 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 6592 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6593 6594def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 6595 Int64Regs:$s, Int32Regs:$x, 6596 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6597 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 6598 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6599 6600 6601 6602def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 6603 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6604 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6605 Int16Regs:$r)>; 6606 6607def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 6608 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6609 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6610 Int16Regs:$r)>; 6611 6612def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 6613 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6614 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6615 Int32Regs:$r)>; 6616 6617def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 6618 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6619 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6620 Int64Regs:$r)>; 6621 6622def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 6623 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6624 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6625 Int16Regs:$r, Int16Regs:$g)>; 6626 6627def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 6628 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6629 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6630 Int16Regs:$r, Int16Regs:$g)>; 6631 6632def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 6633 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6634 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6635 Int32Regs:$r, Int32Regs:$g)>; 6636 6637def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 6638 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6639 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6640 Int64Regs:$r, Int64Regs:$g)>; 6641 6642def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 6643 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6644 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6645 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6646 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6647 6648def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 6649 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6650 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6651 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6652 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6653 6654def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 6655 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6656 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6657 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6658 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6659 6660 6661 6662def : Pat<(int_nvvm_sust_b_2d_i8_trap 6663 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6664 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6665 Int16Regs:$r)>; 6666 6667def : Pat<(int_nvvm_sust_b_2d_i16_trap 6668 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6669 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6670 Int16Regs:$r)>; 6671 6672def : Pat<(int_nvvm_sust_b_2d_i32_trap 6673 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6674 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6675 Int32Regs:$r)>; 6676 6677def : Pat<(int_nvvm_sust_b_2d_i64_trap 6678 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6679 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6680 Int64Regs:$r)>; 6681 6682def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 6683 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6684 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6685 Int16Regs:$r, Int16Regs:$g)>; 6686 6687def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 6688 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6689 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6690 Int16Regs:$r, Int16Regs:$g)>; 6691 6692def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 6693 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6694 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6695 Int32Regs:$r, Int32Regs:$g)>; 6696 6697def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 6698 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6699 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6700 Int64Regs:$r, Int64Regs:$g)>; 6701 6702def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 6703 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6704 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6705 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6706 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6707 6708def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 6709 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6710 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6711 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6712 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6713 6714def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 6715 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6716 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6717 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6718 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6719 6720 6721 6722def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 6723 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6724 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, 6725 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6726 Int16Regs:$r)>; 6727 6728def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 6729 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6730 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, 6731 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6732 Int16Regs:$r)>; 6733 6734def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 6735 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6736 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, 6737 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6738 Int32Regs:$r)>; 6739 6740def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 6741 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6742 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, 6743 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6744 Int64Regs:$r)>; 6745 6746def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 6747 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6748 Int16Regs:$r, Int16Regs:$g), 6749 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 6750 Int32Regs:$x, Int32Regs:$y, 6751 Int16Regs:$r, Int16Regs:$g)>; 6752 6753def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 6754 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6755 Int16Regs:$r, Int16Regs:$g), 6756 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 6757 Int32Regs:$x, Int32Regs:$y, 6758 Int16Regs:$r, Int16Regs:$g)>; 6759 6760def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 6761 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6762 Int32Regs:$g), 6763 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 6764 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6765 6766def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 6767 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6768 Int64Regs:$g), 6769 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, 6770 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6771 6772def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 6773 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6774 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6775 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 6776 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6777 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6778 6779def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 6780 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6781 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6782 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 6783 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6784 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6785 6786def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 6787 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6788 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6789 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 6790 Int32Regs:$x, Int32Regs:$y, 6791 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6792 6793 6794 6795def : Pat<(int_nvvm_sust_b_3d_i8_trap 6796 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6797 Int16Regs:$r), 6798 (SUST_B_3D_B8_TRAP Int64Regs:$s, 6799 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6800 Int16Regs:$r)>; 6801 6802def : Pat<(int_nvvm_sust_b_3d_i16_trap 6803 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6804 Int16Regs:$r), 6805 (SUST_B_3D_B16_TRAP Int64Regs:$s, 6806 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6807 Int16Regs:$r)>; 6808 6809def : Pat<(int_nvvm_sust_b_3d_i32_trap 6810 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6811 Int32Regs:$r), 6812 (SUST_B_3D_B32_TRAP Int64Regs:$s, 6813 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6814 Int32Regs:$r)>; 6815 6816def : Pat<(int_nvvm_sust_b_3d_i64_trap 6817 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6818 Int64Regs:$r), 6819 (SUST_B_3D_B64_TRAP Int64Regs:$s, 6820 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6821 Int64Regs:$r)>; 6822 6823def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 6824 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6825 Int16Regs:$r, Int16Regs:$g), 6826 (SUST_B_3D_V2B8_TRAP Int64Regs:$s, 6827 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6828 Int16Regs:$r, Int16Regs:$g)>; 6829 6830def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 6831 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6832 Int16Regs:$r, Int16Regs:$g), 6833 (SUST_B_3D_V2B16_TRAP Int64Regs:$s, 6834 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6835 Int16Regs:$r, Int16Regs:$g)>; 6836 6837def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 6838 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6839 Int32Regs:$r, Int32Regs:$g), 6840 (SUST_B_3D_V2B32_TRAP Int64Regs:$s, 6841 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6842 Int32Regs:$r, Int32Regs:$g)>; 6843 6844def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 6845 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6846 Int64Regs:$r, Int64Regs:$g), 6847 (SUST_B_3D_V2B64_TRAP Int64Regs:$s, 6848 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6849 Int64Regs:$r, Int64Regs:$g)>; 6850 6851def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 6852 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6853 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6854 (SUST_B_3D_V4B8_TRAP Int64Regs:$s, 6855 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6856 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6857 6858def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 6859 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6860 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6861 (SUST_B_3D_V4B16_TRAP Int64Regs:$s, 6862 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6863 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6864 6865def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 6866 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6867 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6868 (SUST_B_3D_V4B32_TRAP Int64Regs:$s, 6869 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6870 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6871 6872 6873// .zero variant 6874def : Pat<(int_nvvm_sust_b_1d_i8_zero 6875 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6876 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6877 6878def : Pat<(int_nvvm_sust_b_1d_i16_zero 6879 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6880 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6881 6882def : Pat<(int_nvvm_sust_b_1d_i32_zero 6883 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6884 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6885 6886def : Pat<(int_nvvm_sust_b_1d_i64_zero 6887 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6888 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6889 6890def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 6891 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6892 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, 6893 Int16Regs:$r, Int16Regs:$g)>; 6894 6895def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 6896 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6897 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, 6898 Int16Regs:$r, Int16Regs:$g)>; 6899 6900def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 6901 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6902 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, 6903 Int32Regs:$r, Int32Regs:$g)>; 6904 6905def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 6906 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6907 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, 6908 Int64Regs:$r, Int64Regs:$g)>; 6909 6910def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 6911 Int64Regs:$s, Int32Regs:$x, 6912 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6913 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, 6914 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6915 6916def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 6917 Int64Regs:$s, Int32Regs:$x, 6918 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6919 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, 6920 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6921 6922def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 6923 Int64Regs:$s, Int32Regs:$x, 6924 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6925 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, 6926 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6927 6928 6929 6930def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 6931 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6932 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6933 Int16Regs:$r)>; 6934 6935def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 6936 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6937 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6938 Int16Regs:$r)>; 6939 6940def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 6941 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6942 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6943 Int32Regs:$r)>; 6944 6945def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 6946 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6947 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6948 Int64Regs:$r)>; 6949 6950def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 6951 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6952 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6953 Int16Regs:$r, Int16Regs:$g)>; 6954 6955def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 6956 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6957 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6958 Int16Regs:$r, Int16Regs:$g)>; 6959 6960def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 6961 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6962 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6963 Int32Regs:$r, Int32Regs:$g)>; 6964 6965def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 6966 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6967 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6968 Int64Regs:$r, Int64Regs:$g)>; 6969 6970def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 6971 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6972 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6973 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6974 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6975 6976def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 6977 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6978 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6979 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6980 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6981 6982def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 6983 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6984 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6985 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6986 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6987 6988 6989 6990def : Pat<(int_nvvm_sust_b_2d_i8_zero 6991 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6992 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6993 Int16Regs:$r)>; 6994 6995def : Pat<(int_nvvm_sust_b_2d_i16_zero 6996 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6997 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6998 Int16Regs:$r)>; 6999 7000def : Pat<(int_nvvm_sust_b_2d_i32_zero 7001 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7002 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7003 Int32Regs:$r)>; 7004 7005def : Pat<(int_nvvm_sust_b_2d_i64_zero 7006 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 7007 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7008 Int64Regs:$r)>; 7009 7010def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 7011 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7012 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7013 Int16Regs:$r, Int16Regs:$g)>; 7014 7015def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 7016 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7017 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7018 Int16Regs:$r, Int16Regs:$g)>; 7019 7020def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 7021 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 7022 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7023 Int32Regs:$r, Int32Regs:$g)>; 7024 7025def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 7026 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 7027 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7028 Int64Regs:$r, Int64Regs:$g)>; 7029 7030def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 7031 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7032 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7033 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7034 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7035 7036def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 7037 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7038 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7039 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7040 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7041 7042def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 7043 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7044 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7045 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7046 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7047 7048 7049 7050def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 7051 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7052 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, 7053 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7054 Int16Regs:$r)>; 7055 7056def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 7057 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7058 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, 7059 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7060 Int16Regs:$r)>; 7061 7062def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 7063 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7064 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, 7065 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7066 Int32Regs:$r)>; 7067 7068def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 7069 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 7070 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, 7071 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7072 Int64Regs:$r)>; 7073 7074def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 7075 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7076 Int16Regs:$r, Int16Regs:$g), 7077 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, 7078 Int32Regs:$x, Int32Regs:$y, 7079 Int16Regs:$r, Int16Regs:$g)>; 7080 7081def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 7082 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7083 Int16Regs:$r, Int16Regs:$g), 7084 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, 7085 Int32Regs:$x, Int32Regs:$y, 7086 Int16Regs:$r, Int16Regs:$g)>; 7087 7088def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 7089 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 7090 Int32Regs:$g), 7091 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, 7092 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 7093 7094def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 7095 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 7096 Int64Regs:$g), 7097 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, 7098 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 7099 7100def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 7101 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7102 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7103 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, 7104 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7105 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7106 7107def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 7108 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7109 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7110 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, 7111 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7112 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7113 7114def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 7115 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7116 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7117 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, 7118 Int32Regs:$x, Int32Regs:$y, 7119 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7120 7121 7122 7123def : Pat<(int_nvvm_sust_b_3d_i8_zero 7124 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7125 Int16Regs:$r), 7126 (SUST_B_3D_B8_ZERO Int64Regs:$s, 7127 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7128 Int16Regs:$r)>; 7129 7130def : Pat<(int_nvvm_sust_b_3d_i16_zero 7131 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7132 Int16Regs:$r), 7133 (SUST_B_3D_B16_ZERO Int64Regs:$s, 7134 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7135 Int16Regs:$r)>; 7136 7137def : Pat<(int_nvvm_sust_b_3d_i32_zero 7138 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7139 Int32Regs:$r), 7140 (SUST_B_3D_B32_ZERO Int64Regs:$s, 7141 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7142 Int32Regs:$r)>; 7143 7144def : Pat<(int_nvvm_sust_b_3d_i64_zero 7145 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7146 Int64Regs:$r), 7147 (SUST_B_3D_B64_ZERO Int64Regs:$s, 7148 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7149 Int64Regs:$r)>; 7150 7151def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 7152 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7153 Int16Regs:$r, Int16Regs:$g), 7154 (SUST_B_3D_V2B8_ZERO Int64Regs:$s, 7155 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7156 Int16Regs:$r, Int16Regs:$g)>; 7157 7158def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 7159 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7160 Int16Regs:$r, Int16Regs:$g), 7161 (SUST_B_3D_V2B16_ZERO Int64Regs:$s, 7162 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7163 Int16Regs:$r, Int16Regs:$g)>; 7164 7165def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 7166 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7167 Int32Regs:$r, Int32Regs:$g), 7168 (SUST_B_3D_V2B32_ZERO Int64Regs:$s, 7169 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7170 Int32Regs:$r, Int32Regs:$g)>; 7171 7172def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 7173 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7174 Int64Regs:$r, Int64Regs:$g), 7175 (SUST_B_3D_V2B64_ZERO Int64Regs:$s, 7176 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7177 Int64Regs:$r, Int64Regs:$g)>; 7178 7179def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 7180 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7181 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7182 (SUST_B_3D_V4B8_ZERO Int64Regs:$s, 7183 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7184 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7185 7186def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 7187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7188 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7189 (SUST_B_3D_V4B16_ZERO Int64Regs:$s, 7190 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7191 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7192 7193def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 7194 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7195 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7196 (SUST_B_3D_V4B32_ZERO Int64Regs:$s, 7197 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7198 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7199 7200 7201 7202 7203def : Pat<(int_nvvm_sust_p_1d_i8_trap 7204 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 7205 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 7206 7207def : Pat<(int_nvvm_sust_p_1d_i16_trap 7208 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 7209 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 7210 7211def : Pat<(int_nvvm_sust_p_1d_i32_trap 7212 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 7213 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 7214 7215def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 7216 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7217 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 7218 Int16Regs:$r, Int16Regs:$g)>; 7219 7220def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 7221 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7222 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 7223 Int16Regs:$r, Int16Regs:$g)>; 7224 7225def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 7226 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7227 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 7228 Int32Regs:$r, Int32Regs:$g)>; 7229 7230def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 7231 Int64Regs:$s, Int32Regs:$x, 7232 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7233 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 7234 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7235 7236def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 7237 Int64Regs:$s, Int32Regs:$x, 7238 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7239 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 7240 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7241 7242def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 7243 Int64Regs:$s, Int32Regs:$x, 7244 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7245 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 7246 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7247 7248 7249 7250def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 7251 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7252 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7253 Int16Regs:$r)>; 7254 7255def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 7256 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7257 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7258 Int16Regs:$r)>; 7259 7260def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 7261 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 7262 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7263 Int32Regs:$r)>; 7264 7265def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 7266 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7267 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7268 Int16Regs:$r, Int16Regs:$g)>; 7269 7270def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 7271 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7272 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7273 Int16Regs:$r, Int16Regs:$g)>; 7274 7275def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 7276 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7277 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7278 Int32Regs:$r, Int32Regs:$g)>; 7279 7280def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 7281 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7282 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7283 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7284 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7285 7286def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 7287 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7288 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7289 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7290 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7291 7292def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 7293 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7294 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7295 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7296 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7297 7298 7299 7300def : Pat<(int_nvvm_sust_p_2d_i8_trap 7301 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7302 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7303 Int16Regs:$r)>; 7304 7305def : Pat<(int_nvvm_sust_p_2d_i16_trap 7306 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7307 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7308 Int16Regs:$r)>; 7309 7310def : Pat<(int_nvvm_sust_p_2d_i32_trap 7311 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7312 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7313 Int32Regs:$r)>; 7314 7315def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 7316 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7317 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7318 Int16Regs:$r, Int16Regs:$g)>; 7319 7320def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 7321 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7322 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7323 Int16Regs:$r, Int16Regs:$g)>; 7324 7325def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 7326 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 7327 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7328 Int32Regs:$r, Int32Regs:$g)>; 7329 7330def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 7331 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7332 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7333 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7334 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7335 7336def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 7337 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7338 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7339 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7340 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7341 7342def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 7343 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7344 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7345 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7346 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7347 7348 7349 7350def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 7351 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7352 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, 7353 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7354 Int16Regs:$r)>; 7355 7356def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 7357 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7358 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, 7359 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7360 Int16Regs:$r)>; 7361 7362def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 7363 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7364 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, 7365 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7366 Int32Regs:$r)>; 7367 7368def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 7369 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7370 Int16Regs:$r, Int16Regs:$g), 7371 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 7372 Int32Regs:$x, Int32Regs:$y, 7373 Int16Regs:$r, Int16Regs:$g)>; 7374 7375def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 7376 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7377 Int16Regs:$r, Int16Regs:$g), 7378 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 7379 Int32Regs:$x, Int32Regs:$y, 7380 Int16Regs:$r, Int16Regs:$g)>; 7381 7382def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 7383 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 7384 Int32Regs:$g), 7385 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 7386 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 7387 7388def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 7389 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7390 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7391 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 7392 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7393 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7394 7395def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 7396 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7397 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7398 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 7399 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7400 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7401 7402def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 7403 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7404 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7405 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 7406 Int32Regs:$x, Int32Regs:$y, 7407 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7408 7409 7410 7411def : Pat<(int_nvvm_sust_p_3d_i8_trap 7412 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7413 Int16Regs:$r), 7414 (SUST_P_3D_B8_TRAP Int64Regs:$s, 7415 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7416 Int16Regs:$r)>; 7417 7418def : Pat<(int_nvvm_sust_p_3d_i16_trap 7419 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7420 Int16Regs:$r), 7421 (SUST_P_3D_B16_TRAP Int64Regs:$s, 7422 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7423 Int16Regs:$r)>; 7424 7425def : Pat<(int_nvvm_sust_p_3d_i32_trap 7426 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7427 Int32Regs:$r), 7428 (SUST_P_3D_B32_TRAP Int64Regs:$s, 7429 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7430 Int32Regs:$r)>; 7431 7432def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 7433 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7434 Int16Regs:$r, Int16Regs:$g), 7435 (SUST_P_3D_V2B8_TRAP Int64Regs:$s, 7436 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7437 Int16Regs:$r, Int16Regs:$g)>; 7438 7439def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 7440 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7441 Int16Regs:$r, Int16Regs:$g), 7442 (SUST_P_3D_V2B16_TRAP Int64Regs:$s, 7443 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7444 Int16Regs:$r, Int16Regs:$g)>; 7445 7446def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 7447 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7448 Int32Regs:$r, Int32Regs:$g), 7449 (SUST_P_3D_V2B32_TRAP Int64Regs:$s, 7450 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7451 Int32Regs:$r, Int32Regs:$g)>; 7452 7453def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 7454 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7455 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7456 (SUST_P_3D_V4B8_TRAP Int64Regs:$s, 7457 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7458 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7459 7460def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 7461 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7462 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7463 (SUST_P_3D_V4B16_TRAP Int64Regs:$s, 7464 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7465 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7466 7467def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 7468 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7469 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7470 (SUST_P_3D_V4B32_TRAP Int64Regs:$s, 7471 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7472 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7473 7474//----------------------------------- 7475// Read Special Registers 7476//----------------------------------- 7477 7478class PTX_READ_SREG_R64<string regname, Intrinsic intop> 7479 : NVPTXInst<(outs Int64Regs:$d), (ins), 7480 !strconcat("mov.u64 \t$d, %", regname, ";"), 7481 [(set Int64Regs:$d, (intop))]>; 7482 7483class PTX_READ_SREG_R32<string regname, Intrinsic intop> 7484 : NVPTXInst<(outs Int32Regs:$d), (ins), 7485 !strconcat("mov.u32 \t$d, %", regname, ";"), 7486 [(set Int32Regs:$d, (intop))]>; 7487 7488// TODO Add read vector-version of special registers 7489 7490def INT_PTX_SREG_TID_X : 7491 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>; 7492def INT_PTX_SREG_TID_Y : 7493 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>; 7494def INT_PTX_SREG_TID_Z : 7495 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>; 7496def INT_PTX_SREG_TID_W : 7497 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>; 7498 7499def INT_PTX_SREG_NTID_X : 7500 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>; 7501def INT_PTX_SREG_NTID_Y : 7502 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>; 7503def INT_PTX_SREG_NTID_Z : 7504 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>; 7505def INT_PTX_SREG_NTID_W : 7506 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>; 7507 7508def INT_PTX_SREG_LANEID : 7509 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 7510def INT_PTX_SREG_WARPID : 7511 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 7512def INT_PTX_SREG_NWARPID : 7513 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 7514 7515def INT_PTX_SREG_CTAID_X : 7516 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>; 7517def INT_PTX_SREG_CTAID_Y : 7518 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>; 7519def INT_PTX_SREG_CTAID_Z : 7520 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>; 7521def INT_PTX_SREG_CTAID_W : 7522 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>; 7523 7524def INT_PTX_SREG_NCTAID_X : 7525 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>; 7526def INT_PTX_SREG_NCTAID_Y : 7527 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>; 7528def INT_PTX_SREG_NCTAID_Z : 7529 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>; 7530def INT_PTX_SREG_NCTAID_W : 7531 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>; 7532 7533def INT_PTX_SREG_SMID : 7534 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 7535def INT_PTX_SREG_NSMID : 7536 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 7537def INT_PTX_SREG_GRIDID : 7538 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 7539 7540def INT_PTX_SREG_LANEMASK_EQ : 7541 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 7542def INT_PTX_SREG_LANEMASK_LE : 7543 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 7544def INT_PTX_SREG_LANEMASK_LT : 7545 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 7546def INT_PTX_SREG_LANEMASK_GE : 7547 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 7548def INT_PTX_SREG_LANEMASK_GT : 7549 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 7550 7551def INT_PTX_SREG_CLOCK : 7552 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 7553def INT_PTX_SREG_CLOCK64 : 7554 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 7555 7556def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 7557def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 7558def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 7559def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 7560 7561// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 7562// handle the constant. 7563def INT_PTX_SREG_WARPSIZE : 7564 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 7565 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 7566 7567// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 7568// In addition to target-independent fields provided by WMMA_REGS, it adds 7569// the fields commonly used to implement specific PTX instruction -- register 7570// types and names, constraints, parts of assembly, etc. 7571class WMMA_REGINFO<WMMA_REGS r, string op> 7572 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 7573 // NVPTX register types used to carry fragment data. 7574 NVPTXRegClass regclass = !cond( 7575 !eq(ptx_elt_type, "f16") : Float16x2Regs, 7576 !eq(ptx_elt_type, "f32") : Float32Regs, 7577 !eq(ptx_elt_type, "f64") : Float64Regs, 7578 !eq(ptx_elt_type, "bf16") : Int32Regs, 7579 !eq(ptx_elt_type, "tf32") : Int32Regs, 7580 !eq(ptx_elt_type, "s32") : Int32Regs, 7581 !eq(ptx_elt_type, "s8") : Int32Regs, 7582 !eq(ptx_elt_type, "u8") : Int32Regs, 7583 !eq(ptx_elt_type, "s4") : Int32Regs, 7584 !eq(ptx_elt_type, "u4") : Int32Regs, 7585 !eq(ptx_elt_type, "b1") : Int32Regs); 7586 7587 // Instruction input/output arguments for the fragment. 7588 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 7589 7590 // List of register names for the fragment -- ["ra0", "ra1",...] 7591 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 7592 7593 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 7594 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 7595 7596 // Predicates for particular fragment variant. Technically those are 7597 // per-instruction predicates, but currently all fragments that can be used in 7598 // a given instruction are subject to the same constraints, so an instruction 7599 // can use predicates from any of its fragments. If/when this is no 7600 // longer the case, we can concat all per-fragment predicates to enforce that 7601 // all fragments of the instruction are viable. 7602 list<Predicate> Predicates = !cond( 7603 // fp16 -> fp16/fp32 @ m16n16k16 7604 !and(!eq(geom, "m16n16k16"), 7605 !or(!eq(ptx_elt_type, "f16"), 7606 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60], 7607 7608 !and(!eq(geom,"m8n8k4"), 7609 !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70], 7610 7611 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 7612 !and(!or(!eq(geom, "m8n32k16"), 7613 !eq(geom, "m32n8k16")), 7614 !or(!eq(ptx_elt_type, "f16"), 7615 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61], 7616 7617 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 7618 !and(!or(!eq(geom,"m16n16k16"), 7619 !eq(geom,"m8n32k16"), 7620 !eq(geom,"m32n8k16")), 7621 !or(!eq(ptx_elt_type, "u8"), 7622 !eq(ptx_elt_type, "s8"), 7623 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63], 7624 7625 !and(!or(!eq(geom,"m16n16k16"), 7626 !eq(geom,"m8n32k16"), 7627 !eq(geom,"m32n8k16")), 7628 !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70], 7629 7630 !and(!eq(geom,"m16n16k8"), 7631 !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70], 7632 7633 !and(!eq(geom,"m16n16k8"), 7634 !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70], 7635 7636 // b1 -> s32 @ m8n8k128(b1) 7637 !and(!ne(op,"mma"), 7638 !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63], 7639 7640 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 7641 !and(!ne(op,"mma"), 7642 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63], 7643 7644 !or(!eq(geom,"m16n8k8"), 7645 !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65], 7646 7647 !and(!ne(ptx_elt_type,"f64"), 7648 !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64], 7649 7650 // mma m8n8k32 requires higher PTX version 7651 !and(!eq(op,"mma"), 7652 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65], 7653 7654 !and(!eq(ptx_elt_type,"f64"), 7655 !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70], 7656 7657 !and(!eq(op,"mma"), 7658 !or(!eq(geom, "m16n8k16"), 7659 !eq(geom, "m16n8k4"), 7660 !eq(geom, "m16n8k32"), 7661 !eq(geom, "m16n8k64"), 7662 !eq(geom, "m8n8k128"), 7663 !eq(geom, "m16n8k128"), 7664 !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70]); 7665 7666 // template DAGs for instruction inputs/output. 7667 dag Outs = !dag(outs, ptx_regs, reg_names); 7668 dag Ins = !dag(ins, ptx_regs, reg_names); 7669} 7670 7671// Convert dag of arguments into a dag to match given intrinsic. 7672class BuildPatternI<Intrinsic Intr, dag Ins> { 7673 // Build a dag pattern that matches the intrinsic call. 7674 dag ret = !foreach(tmp, Ins, 7675 !subst(imem, ADDRvar, 7676 !subst(MEMri64, ADDRri64, 7677 !subst(MEMri, ADDRri, 7678 !subst(ins, Intr, tmp))))); 7679} 7680 7681// Same as above, but uses PatFrag instead of an Intrinsic. 7682class BuildPatternPF<PatFrag Intr, dag Ins> { 7683 // Build a dag pattern that matches the intrinsic call. 7684 dag ret = !foreach(tmp, Ins, 7685 !subst(imem, ADDRvar, 7686 !subst(MEMri64, ADDRri64, 7687 !subst(MEMri, ADDRri, 7688 !subst(ins, Intr, tmp))))); 7689} 7690 7691// Common WMMA-related fields used for building patterns for all MMA instructions. 7692class WMMA_INSTR<string _Intr, list<dag> _Args> 7693 : NVPTXInst<(outs), (ins), "?", []> { 7694 Intrinsic Intr = !cast<Intrinsic>(_Intr); 7695 // Concatenate all arguments into a single dag. 7696 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 7697 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 7698 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 7699} 7700 7701// 7702// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7703// 7704 7705class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 7706 DAGOperand SrcOp> 7707 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 7708 [!con((ins SrcOp:$src), 7709 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7710 Requires<Frag.Predicates> { 7711 // Load/store intrinsics are overloaded on pointer's address space. 7712 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7713 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7714 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 7715 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 7716 // Build PatFrag that only matches particular address space. 7717 PatFrag IntrFrag = PatFrag<PFOperands, 7718 PFOperandsIntr, 7719 !cond(!eq(Space, ".shared"): AS_match.shared, 7720 !eq(Space, ".global"): AS_match.global, 7721 true: AS_match.generic)>; 7722 // Build AS-constrained pattern. 7723 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7724 7725 let OutOperandList = Frag.Outs; 7726 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7727 let AsmString = "wmma.load." 7728 # Frag.frag 7729 # ".sync" 7730 # "${ptx:aligned}" 7731 # "." # Layout 7732 # "." # Frag.geom 7733 # Space 7734 # "." # Frag.ptx_elt_type # " \t" 7735 # Frag.regstring 7736 # ", [$src]" 7737 # !if(WithStride, ", $ldm", "") 7738 # ";"; 7739} 7740 7741// 7742// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7743// 7744class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 7745 bit WithStride, DAGOperand DstOp> 7746 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 7747 [!con((ins DstOp:$dst), 7748 Frag.Ins, 7749 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7750 Requires<Frag.Predicates> { 7751 7752 // Load/store intrinsics are overloaded on pointer's address space. 7753 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7754 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7755 dag PFOperands = !con((ops node:$dst), 7756 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 7757 !if(WithStride, (ops node:$ldm), (ops))); 7758 // Build PatFrag that only matches particular address space. 7759 PatFrag IntrFrag = PatFrag<PFOperands, 7760 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 7761 !cond(!eq(Space, ".shared"): AS_match.shared, 7762 !eq(Space, ".global"): AS_match.global, 7763 true: AS_match.generic)>; 7764 // Build AS-constrained pattern. 7765 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7766 7767 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7768 let OutOperandList = (outs); 7769 let AsmString = "wmma.store.d.sync" 7770 # "${ptx:aligned}" 7771 # "." # Layout 7772 # "." # Frag.geom 7773 # Space 7774 # "." # Frag.ptx_elt_type 7775 # " \t[$dst]," 7776 # Frag.regstring 7777 # !if(WithStride, ", $ldm", "") 7778 # ";"; 7779} 7780 7781// Create all load/store variants 7782defset list<WMMA_INSTR> MMA_LDSTs = { 7783 foreach layout = ["row", "col"] in { 7784 foreach stride = [false, true] in { 7785 foreach space = [".global", ".shared", ""] in { 7786 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 7787 foreach frag = NVVM_MMA_OPS.all_ld_ops in 7788 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 7789 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 7790 foreach frag = NVVM_MMA_OPS.all_st_ops in 7791 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 7792 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 7793 } // addr 7794 } // space 7795 } // stride 7796 } // layout 7797} // defset 7798 7799// B1 instruction variants need extra constraints. 7800class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 7801 string Op = b1op; 7802 WMMA_REGINFO Frag = FragA; 7803 list<Predicate> ret = !listconcat( 7804 FragA.Predicates, 7805 !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[]) 7806 ); 7807} 7808// WMMA.MMA 7809class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 7810 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 7811 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 7812 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 7813 [FragA.Ins, FragB.Ins, FragC.Ins]>, 7814 // Requires does not seem to have effect on Instruction w/o Patterns. 7815 // We set it here anyways and propagate to the Pat<> we construct below. 7816 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 7817 let OutOperandList = FragD.Outs; 7818 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7819 string TypeList = !cond( 7820 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 7821 # "." # FragC.ptx_elt_type, 7822 1: "." # FragD.ptx_elt_type 7823 # "." # FragA.ptx_elt_type 7824 # "." # FragB.ptx_elt_type 7825 # "." # FragC.ptx_elt_type, 7826 ); 7827 let AsmString = "wmma.mma" 7828 # b1op 7829 # ".sync" 7830 # "${ptx:aligned}" 7831 # "." # ALayout 7832 # "." # BLayout 7833 # "." # FragA.geom 7834 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 7835 # TypeList 7836 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 7837 # FragD.regstring # ",\n\t\t" 7838 # FragA.regstring # ",\n\t\t" 7839 # FragB.regstring # ",\n\t\t" 7840 # FragC.regstring # ";"; 7841} 7842 7843defset list<WMMA_INSTR> WMMAs = { 7844 foreach layout_a = ["row", "col"] in { 7845 foreach layout_b = ["row", "col"] in { 7846 foreach satf = [0, 1] in { 7847 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 7848 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 7849 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 7850 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 7851 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 7852 WMMA_REGINFO<op[1], "wmma.mma">, 7853 WMMA_REGINFO<op[2], "wmma.mma">, 7854 WMMA_REGINFO<op[3], "wmma.mma">, 7855 layout_a, layout_b, satf, rnd, b1op>; 7856 } 7857 } // b1op 7858 } // op 7859 } // rnd 7860 } // satf 7861 } // layout_b 7862 } // layout_a 7863} // defset 7864 7865// MMA 7866class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 7867 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 7868 string ALayout, string BLayout, int Satfinite, string b1op> 7869 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 7870 [FragA.Ins, FragB.Ins, FragC.Ins]>, 7871 // Requires does not seem to have effect on Instruction w/o Patterns. 7872 // We set it here anyways and propagate to the Pat<> we construct below. 7873 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 7874 let OutOperandList = FragD.Outs; 7875 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7876 string TypeList = "." # FragD.ptx_elt_type 7877 # "." # FragA.ptx_elt_type 7878 # "." # FragB.ptx_elt_type 7879 # "." # FragC.ptx_elt_type; 7880 let AsmString = "mma.sync.aligned." 7881 # FragA.geom 7882 # "." # ALayout 7883 # "." # BLayout 7884 # !if(Satfinite, ".satfinite", "") 7885 # TypeList 7886 # b1op # "\n\t\t" 7887 # FragD.regstring # ",\n\t\t" 7888 # FragA.regstring # ",\n\t\t" 7889 # FragB.regstring # ",\n\t\t" 7890 # FragC.regstring # ";"; 7891} 7892 7893defset list<WMMA_INSTR> MMAs = { 7894 foreach layout_a = ["row", "col"] in { 7895 foreach layout_b = ["row", "col"] in { 7896 foreach satf = [0, 1] in { 7897 foreach op = NVVM_MMA_OPS.all_mma_ops in { 7898 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 7899 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 7900 def : MMA<WMMA_REGINFO<op[0], "mma">, 7901 WMMA_REGINFO<op[1], "mma">, 7902 WMMA_REGINFO<op[2], "mma">, 7903 WMMA_REGINFO<op[3], "mma">, 7904 layout_a, layout_b, satf, b1op>; 7905 } 7906 } // b1op 7907 } // op 7908 } // satf 7909 } // layout_b 7910 } // layout_a 7911} // defset 7912 7913 7914// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 7915// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 7916// the instruction record. 7917class MMA_PAT<WMMA_INSTR wi> 7918 : Pat<wi.IntrinsicPattern, 7919 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 7920 (wi ptx.version))>, 7921 Requires<wi.Predicates>; 7922 7923// Build intrinsic->instruction patterns for all MMA instructions. 7924foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs) in 7925 def : MMA_PAT<mma>; 7926