1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX60, hasSM30]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX60, hasSM30]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX60, hasSM30]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX60, hasSM30]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX60, hasSM30]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX60, hasSM30]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX60, hasSM30]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX60, hasSM30]>; 135 136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 137 bit offset_imm, bit mask_imm, bit threadmask_imm> 138 : NVPTXInst<(outs), (ins), "?", []> { 139 NVPTXRegClass rc = !cond( 140 !eq(reg, "i32"): Int32Regs, 141 !eq(reg, "f32"): Float32Regs); 142 string IntrName = "int_nvvm_shfl_" 143 # !if(sync, "sync_", "") 144 # mode 145 # "_" # reg 146 # !if(return_pred, "p", ""); 147 Intrinsic Intr = !cast<Intrinsic>(IntrName); 148 let InOperandList = !con( 149 !if(sync, 150 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 151 (ins)), 152 (ins rc:$src), 153 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 154 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 155 ); 156 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 157 let AsmString = "shfl." 158 # !if(sync, "sync.", "") 159 # mode # ".b32\t" 160 # "$dst" 161 # !if(return_pred, "|$pred", "") # ", " 162 # "$src, $offset, $mask" 163 # !if(sync, ", $threadmask", "") 164 # ";" 165 ; 166 let Pattern = [!con( 167 !foreach(tmp, OutOperandList, 168 !subst(outs, set, 169 !subst(i32imm, imm, tmp))), 170 (set !foreach(tmp, InOperandList, 171 !subst(ins, Intr, 172 !subst(i32imm, imm, tmp)))) 173 )]; 174} 175 176foreach sync = [false, true] in { 177 foreach mode = ["up", "down", "bfly", "idx"] in { 178 foreach regclass = ["i32", "f32"] in { 179 foreach return_pred = [false, true] in { 180 foreach offset_imm = [false, true] in { 181 foreach mask_imm = [false, true] in { 182 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 183 def : SHFL_INSTR<sync, mode, regclass, return_pred, 184 offset_imm, mask_imm, threadmask_imm>, 185 Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>; 186 } 187 } 188 } 189 } 190 } 191 } 192} 193 194// vote.{all,any,uni,ballot} 195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 196 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 197 "vote." # mode # " \t$dest, $pred;", 198 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 199 Requires<[hasPTX60, hasSM30]>; 200} 201 202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 206 207// vote.sync.{all,any,uni,ballot} 208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 209 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 210 "vote.sync." # mode # " \t$dest, $pred, $mask;", 211 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 212 Requires<[hasPTX60, hasSM30]>; 213 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 214 "vote.sync." # mode #" \t$dest, $pred, $mask;", 215 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 216 Requires<[hasPTX60, hasSM30]>; 217} 218 219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 223 224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 225 Operand ImmOp> { 226 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), 227 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 228 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, 229 Requires<[hasPTX60, hasSM70]>; 230 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), 231 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 232 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 233 Requires<[hasPTX60, hasSM70]>; 234 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), 235 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 236 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, 237 Requires<[hasPTX60, hasSM70]>; 238 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), 239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 240 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 241 Requires<[hasPTX60, hasSM70]>; 242} 243 244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 245 i32imm>; 246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 247 i64imm>; 248 249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 250 Operand ImmOp> { 251 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 252 (ins i32imm:$mask, ImmOp:$value), 253 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 254 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 255 Requires<[hasPTX60, hasSM70]>; 256 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 257 (ins Int32Regs:$mask, ImmOp:$value), 258 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 259 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 260 Requires<[hasPTX60, hasSM70]>; 261 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 262 (ins i32imm:$mask, regclass:$value), 263 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 264 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 265 Requires<[hasPTX60, hasSM70]>; 266 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 267 (ins Int32Regs:$mask, regclass:$value), 268 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 269 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 270 Requires<[hasPTX60, hasSM70]>; 271} 272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 273 i32imm>; 274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 275 i64imm>; 276 277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 278 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 279 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 280 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, 281 Requires<[hasPTX70, hasSM80]>; 282} 283 284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 292 293} // isConvergent = true 294 295//----------------------------------- 296// Explicit Memory Fence Functions 297//----------------------------------- 298class MEMBAR<string StrOp, Intrinsic IntOP> : 299 NVPTXInst<(outs), (ins), 300 StrOp, [(IntOP)]>; 301 302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 303def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 305 306 307//----------------------------------- 308// Async Copy Functions 309//----------------------------------- 310 311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 312 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 313 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 314 [(Intrin Int32Regs:$addr)]>, 315 Requires<[hasPTX70, hasSM80]>; 316 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 317 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 318 [(Intrin Int64Regs:$addr)]>, 319 Requires<[hasPTX70, hasSM80]>; 320} 321 322defm CP_ASYNC_MBARRIER_ARRIVE : 323 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 325 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 327 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 329 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 330 331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> { 332 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 333 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 334 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 335 Requires<[hasPTX70, hasSM80]>; 336 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 337 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 338 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 339 Requires<[hasPTX70, hasSM80]>; 340} 341 342defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 343 CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>; 344 345defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 346 CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>; 347 348defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 349 CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>; 350 351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> { 352 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 353 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 354 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 355 Requires<[hasPTX70, hasSM80]>; 356 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 357 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 358 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 359 Requires<[hasPTX70, hasSM80]>; 360} 361 362defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 363 CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>; 364 365def CP_ASYNC_COMMIT_GROUP : 366 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 367 Requires<[hasPTX70, hasSM80]>; 368 369def CP_ASYNC_WAIT_GROUP : 370 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 371 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, 372 Requires<[hasPTX70, hasSM80]>; 373 374def CP_ASYNC_WAIT_ALL : 375 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 376 [(int_nvvm_cp_async_wait_all)]>, 377 Requires<[hasPTX70, hasSM80]>; 378 379//----------------------------------- 380// MBarrier Functions 381//----------------------------------- 382 383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 384 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 385 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 386 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, 387 Requires<[hasPTX70, hasSM80]>; 388 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 389 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 390 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, 391 Requires<[hasPTX70, hasSM80]>; 392} 393 394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 396 int_nvvm_mbarrier_init_shared>; 397 398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 399 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 400 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 401 [(Intrin Int32Regs:$addr)]>, 402 Requires<[hasPTX70, hasSM80]>; 403 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 404 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 405 [(Intrin Int64Regs:$addr)]>, 406 Requires<[hasPTX70, hasSM80]>; 407} 408 409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 411 int_nvvm_mbarrier_inval_shared>; 412 413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 414 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 415 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 416 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 417 Requires<[hasPTX70, hasSM80]>; 418 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 419 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 420 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 421 Requires<[hasPTX70, hasSM80]>; 422} 423 424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 425defm MBARRIER_ARRIVE_SHARED : 426 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 427 428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 429 def _32 : NVPTXInst<(outs Int64Regs:$state), 430 (ins Int32Regs:$addr, Int32Regs:$count), 431 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 432 ".b64 $state, [$addr], $count;"), 433 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 434 Requires<[hasPTX70, hasSM80]>; 435 def _64 : NVPTXInst<(outs Int64Regs:$state), 436 (ins Int64Regs:$addr, Int32Regs:$count), 437 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 438 ".b64 $state, [$addr], $count;"), 439 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 440 Requires<[hasPTX70, hasSM80]>; 441} 442 443defm MBARRIER_ARRIVE_NOCOMPLETE : 444 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 446 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 447 448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 449 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 450 !strconcat("mbarrier.arrive_drop", AddrSpace, 451 ".b64 $state, [$addr];"), 452 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 453 Requires<[hasPTX70, hasSM80]>; 454 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 455 !strconcat("mbarrier.arrive_drop", AddrSpace, 456 ".b64 $state, [$addr];"), 457 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 458 Requires<[hasPTX70, hasSM80]>; 459} 460 461defm MBARRIER_ARRIVE_DROP : 462 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 463defm MBARRIER_ARRIVE_DROP_SHARED : 464 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 465 466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 467 def _32 : NVPTXInst<(outs Int64Regs:$state), 468 (ins Int32Regs:$addr, Int32Regs:$count), 469 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 470 ".b64 $state, [$addr], $count;"), 471 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 472 Requires<[hasPTX70, hasSM80]>; 473 def _64 : NVPTXInst<(outs Int64Regs:$state), 474 (ins Int64Regs:$addr, Int32Regs:$count), 475 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 476 ".b64 $state, [$addr], $count;"), 477 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 478 Requires<[hasPTX70, hasSM80]>; 479} 480 481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 482 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 484 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 485 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 486 487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 488 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 489 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 490 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, 491 Requires<[hasPTX70, hasSM80]>; 492 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 493 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 494 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, 495 Requires<[hasPTX70, hasSM80]>; 496} 497 498defm MBARRIER_TEST_WAIT : 499 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 500defm MBARRIER_TEST_WAIT_SHARED : 501 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 502 503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 504 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 505 "mbarrier.pending_count.b64 $res, $state;", 506 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, 507 Requires<[hasPTX70, hasSM80]>; 508 509def MBARRIER_PENDING_COUNT : 510 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 511 512//----------------------------------- 513// Math Functions 514//----------------------------------- 515 516// Map min(1.0, max(0.0, x)) to sat(x) 517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 518// NaN 519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 520// Same story for fmax, fmin. 521 522def : Pat<(int_nvvm_fmin_f immFloat1, 523 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 524 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 525def : Pat<(int_nvvm_fmin_f immFloat1, 526 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 527 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 528def : Pat<(int_nvvm_fmin_f 529 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 530 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 531def : Pat<(int_nvvm_fmin_f 532 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 533 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 534 535def : Pat<(int_nvvm_fmin_d immDouble1, 536 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 537 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 538def : Pat<(int_nvvm_fmin_d immDouble1, 539 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 540 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 541def : Pat<(int_nvvm_fmin_d 542 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 543 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 544def : Pat<(int_nvvm_fmin_d 545 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 546 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 547 548 549// We need a full string for OpcStr here because we need to deal with case like 550// INT_PTX_RECIP. 551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 552 NVPTXRegClass src_regclass, Intrinsic IntOP> 553 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 554 OpcStr, 555 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 556 557// We need a full string for OpcStr here because we need to deal with the case 558// like INT_PTX_NATIVE_POWR_F. 559class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 560 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 561 : NVPTXInst<(outs t_regclass:$dst), 562 (ins s0_regclass:$src0, s1_regclass:$src1), 563 OpcStr, 564 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 565 566class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 567 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 568 NVPTXRegClass s2_regclass, Intrinsic IntOP> 569 : NVPTXInst<(outs t_regclass:$dst), 570 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 571 OpcStr, 572 [(set t_regclass:$dst, 573 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 574 575// 576// MISC 577// 578 579def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 580 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 581 582// 583// Min Max 584// 585 586def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 587 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 588def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 589 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 590 591def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 592 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 593def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 594 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 595 596def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 597 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 598def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 599 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 600 601 602// 603// Multiplication 604// 605 606def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 607 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 608def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 609 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 610 611def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 612 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 613def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 614 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 615 616def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 617 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 618def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 619 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 620def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 621 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 622def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 623 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 624def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 625 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 626def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 627 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 628def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 629 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 630def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 631 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 632 633def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 634 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 635def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 636 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 637def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 638 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 639def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 640 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 641 642def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 643 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 644def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 645 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 646 647// 648// Div 649// 650 651def INT_NVVM_DIV_APPROX_FTZ_F 652 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 653 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 654def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 655 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 656 657def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 659def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 660 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 661def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 662 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 663def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 664 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 665def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 666 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 667def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 668 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 669def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 670 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 671def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 672 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 673 674def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 675 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 676def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 677 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 678def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 679 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 680def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 681 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 682 683// 684// Sad 685// 686 687def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 688 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 689def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 690 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 691 692// 693// Floor Ceil 694// 695 696def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 697 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 698def : Pat<(int_nvvm_floor_f Float32Regs:$a), 699 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 700def : Pat<(int_nvvm_floor_d Float64Regs:$a), 701 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 702 703def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 704 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 705def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 706 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 707def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 708 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 709 710// 711// Abs 712// 713 714def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 715 Float32Regs, int_nvvm_fabs_ftz_f>; 716def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 717 Float32Regs, int_nvvm_fabs_f>; 718 719def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 720 Float64Regs, int_nvvm_fabs_d>; 721 722// 723// Round 724// 725 726def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 727 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 728def : Pat<(int_nvvm_round_f Float32Regs:$a), 729 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 730def : Pat<(int_nvvm_round_d Float64Regs:$a), 731 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 732 733// 734// Trunc 735// 736 737def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 738 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 739def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 740 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 741def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 742 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 743 744// 745// Saturate 746// 747 748def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 749 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 750def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 751 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 752def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 753 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 754 755// 756// Exp2 Log2 757// 758 759def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 760 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 761def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 762 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 763def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 764 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 765 766def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 767 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 768def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 769 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 770def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 771 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 772 773// 774// Sin Cos 775// 776 777def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 778 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 779def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 780 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 781 782def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 783 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 784def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 785 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 786 787// 788// Fma 789// 790 791def INT_NVVM_FMA_RN_FTZ_F 792 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 793 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 794def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 795 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 796def INT_NVVM_FMA_RZ_FTZ_F 797 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 798 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 799def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 800 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 801def INT_NVVM_FMA_RM_FTZ_F 802 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 803 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 804def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 805 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 806def INT_NVVM_FMA_RP_FTZ_F 807 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 808 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 809def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 810 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 811 812def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 813 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 814def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 815 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 816def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 817 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 818def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 819 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 820 821// 822// Rcp 823// 824 825def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 826 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 827def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 828 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 829def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 830 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 831def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 832 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 833def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 834 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 835def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 836 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 837def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 838 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 839def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 840 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 841 842def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 843 Float64Regs, int_nvvm_rcp_rn_d>; 844def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 845 Float64Regs, int_nvvm_rcp_rz_d>; 846def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 847 Float64Regs, int_nvvm_rcp_rm_d>; 848def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 849 Float64Regs, int_nvvm_rcp_rp_d>; 850 851def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 852 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 853 854// 855// Sqrt 856// 857 858def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 859 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 860def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 861 Float32Regs, int_nvvm_sqrt_rn_f>; 862def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 863 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 864def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 865 Float32Regs, int_nvvm_sqrt_rz_f>; 866def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 867 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 868def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 869 Float32Regs, int_nvvm_sqrt_rm_f>; 870def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 871 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 872def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 873 Float32Regs, int_nvvm_sqrt_rp_f>; 874def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 875 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 876def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 877 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 878 879def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 880 Float64Regs, int_nvvm_sqrt_rn_d>; 881def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 882 Float64Regs, int_nvvm_sqrt_rz_d>; 883def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 884 Float64Regs, int_nvvm_sqrt_rm_d>; 885def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 886 Float64Regs, int_nvvm_sqrt_rp_d>; 887 888// nvvm_sqrt intrinsic 889def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 890 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 891def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 892 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 893def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 894 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 895def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 896 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 897 898// 899// Rsqrt 900// 901 902def INT_NVVM_RSQRT_APPROX_FTZ_F 903 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 904 int_nvvm_rsqrt_approx_ftz_f>; 905def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 906 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 907def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 908 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 909 910// 911// Add 912// 913 914def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 915 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 916def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 917 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 918def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 919 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 920def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 921 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 922def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 923 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 924def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 925 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 926def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 927 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 928def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 929 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 930 931def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 932 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 933def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 934 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 935def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 936 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 937def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 938 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 939 940// 941// Convert 942// 943 944def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 945 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 946def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 947 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 948def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 949 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 950def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 951 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 952def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 953 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 954def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 955 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 956def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 957 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 958def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 959 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 960 961def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 962 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 963def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 964 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 965def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 966 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 967def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 968 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 969 970def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 971 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 972def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 973 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 974def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 975 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 976def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 977 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 978 979def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 980 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 981def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 982 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 983def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 984 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 985def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 986 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 987 988def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 989 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 990def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 991 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 992def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 993 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 994def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 995 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 996 997def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 998 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 999def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 1000 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 1001def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 1002 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1003def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 1004 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 1005def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 1006 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1007def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 1008 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 1009def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 1010 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1011def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 1012 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 1013 1014def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 1015 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1016def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 1017 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 1018def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 1019 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1020def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 1021 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 1022def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 1023 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1024def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 1025 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 1026def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 1027 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1028def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 1029 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 1030 1031def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 1032 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 1033def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 1034 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 1035def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 1036 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 1037def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 1038 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 1039 1040def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 1041 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 1042def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 1043 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 1044def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 1045 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 1046def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 1047 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 1048 1049def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b), 1050 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1051def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1052 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1053def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b), 1054 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1055def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1056 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1057 1058def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b), 1059 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1060def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1061 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1062def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b), 1063 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1064def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1065 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1066 1067def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a), 1068 (CVT_bf16_f32 Float32Regs:$a, CvtRN)>; 1069def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a), 1070 (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>; 1071def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a), 1072 (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>; 1073def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a), 1074 (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>; 1075 1076def CVT_tf32_f32 : 1077 NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a), 1078 "cvt.rna.tf32.f32 \t$dest, $a;", 1079 [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>; 1080 1081def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1082 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1083 1084def INT_NVVM_D2I_LO : F_MATH_1< 1085 !strconcat("{{\n\t", 1086 ".reg .b32 %temp; \n\t", 1087 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1088 "}}"), 1089 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1090def INT_NVVM_D2I_HI : F_MATH_1< 1091 !strconcat("{{\n\t", 1092 ".reg .b32 %temp; \n\t", 1093 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1094 "}}"), 1095 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1096 1097def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 1098 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1099def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 1100 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 1101def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 1102 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1103def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 1104 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 1105def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 1106 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1107def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 1108 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 1109def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 1110 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1111def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 1112 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 1113 1114def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 1115 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1116def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 1117 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 1118def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 1119 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1120def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 1121 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 1122def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 1123 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1124def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 1125 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 1126def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 1127 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1128def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 1129 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 1130 1131def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 1132 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 1133def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 1134 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 1135def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 1136 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 1137def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 1138 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 1139 1140def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 1141 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 1142def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 1143 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 1144def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 1145 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 1146def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 1147 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 1148 1149def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 1150 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 1151def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 1152 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 1153def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 1154 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 1155def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 1156 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 1157 1158def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 1159 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 1160def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 1161 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 1162def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 1163 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 1164def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 1165 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 1166 1167def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 1168 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 1169def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 1170 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 1171def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 1172 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 1173def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 1174 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 1175 1176def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 1177 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 1178def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 1179 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 1180def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 1181 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 1182def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 1183 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 1184 1185 1186def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 1187 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; 1188def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 1189 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; 1190 1191// 1192// Bitcast 1193// 1194 1195def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 1196 Float32Regs, int_nvvm_bitcast_f2i>; 1197def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 1198 Int32Regs, int_nvvm_bitcast_i2f>; 1199 1200def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1201 Int64Regs, int_nvvm_bitcast_ll2d>; 1202def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1203 Float64Regs, int_nvvm_bitcast_d2ll>; 1204 1205// 1206// FNS 1207// 1208 1209class INT_FNS_MBO<dag ins, dag Operands> 1210 : NVPTXInst<(outs Int32Regs:$dst), ins, 1211 "fns.b32 \t$dst, $mask, $base, $offset;", 1212 [(set Int32Regs:$dst, Operands )]>, 1213 Requires<[hasPTX60, hasSM30]>; 1214 1215def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1216 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1217def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1218 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1219def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1220 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1221def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1222 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1223def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1224 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1225def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1226 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1227def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1228 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1229def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1230 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1231 1232//----------------------------------- 1233// Atomic Functions 1234//----------------------------------- 1235 1236class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1237 : PatFrag<ops, frag, AS_match.global>; 1238class ATOMIC_SHARED_CHK <dag ops, dag frag> 1239 : PatFrag<ops, frag, AS_match.shared>; 1240class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1241 : PatFrag<ops, frag, AS_match.generic>; 1242 1243multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1244 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1245 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1246 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1247 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1248 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1249 Requires<Pred>; 1250 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1251 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1252 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1253 Requires<Pred>; 1254} 1255multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1256 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1257 list<Predicate> Pred = []> { 1258 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1259 IntOp, IMMType, IMM, Pred>; 1260 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1261 IntOp, IMMType, IMM, Pred>; 1262} 1263 1264// has 2 operands, neg the second one 1265multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1266 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1267 list<Predicate> Pred> { 1268 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1269 !strconcat( 1270 "{{ \n\t", 1271 ".reg \t.s", TypeStr, " temp; \n\t", 1272 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1273 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1274 "}}"), 1275 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1276 Requires<Pred>; 1277} 1278multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1279 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { 1280 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1281 IntOp, Pred> ; 1282 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1283 IntOp, Pred> ; 1284} 1285 1286// has 3 operands 1287multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1288 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1289 Operand IMMType, list<Predicate> Pred> { 1290 def reg : NVPTXInst<(outs regclass:$dst), 1291 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1292 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1293 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1294 Requires<Pred>; 1295 1296 def imm1 : NVPTXInst<(outs regclass:$dst), 1297 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1298 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1299 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1300 Requires<Pred>; 1301 1302 def imm2 : NVPTXInst<(outs regclass:$dst), 1303 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1304 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1305 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1306 Requires<Pred>; 1307 1308 def imm3 : NVPTXInst<(outs regclass:$dst), 1309 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1310 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1311 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1312 Requires<Pred>; 1313} 1314multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1315 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1316 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1317 IntOp, IMMType, Pred>; 1318 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1319 IntOp, IMMType, Pred>; 1320} 1321 1322// atom_add 1323 1324def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1325 (atomic_load_add_32 node:$a, node:$b)>; 1326def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1327 (atomic_load_add_32 node:$a, node:$b)>; 1328def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1329 (atomic_load_add_32 node:$a, node:$b)>; 1330def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1331 (atomic_load_add_64 node:$a, node:$b)>; 1332def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1333 (atomic_load_add_64 node:$a, node:$b)>; 1334def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1335 (atomic_load_add_64 node:$a, node:$b)>; 1336def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1337 (atomic_load_fadd node:$a, node:$b)>; 1338def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1339 (atomic_load_fadd node:$a, node:$b)>; 1340def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1341 (atomic_load_fadd node:$a, node:$b)>; 1342 1343defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1344 atomic_load_add_32_g, i32imm, imm>; 1345defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1346 atomic_load_add_32_s, i32imm, imm>; 1347defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1348 atomic_load_add_32_gen, i32imm, imm>; 1349defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1350 ".add", atomic_load_add_32_gen, i32imm, imm>; 1351 1352defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1353 atomic_load_add_64_g, i64imm, imm>; 1354defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1355 atomic_load_add_64_s, i64imm, imm>; 1356defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1357 atomic_load_add_64_gen, i64imm, imm>; 1358defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1359 ".add", atomic_load_add_64_gen, i64imm, imm>; 1360 1361defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1362 atomic_load_add_g, f32imm, fpimm>; 1363defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1364 atomic_load_add_s, f32imm, fpimm>; 1365defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1366 atomic_load_add_gen, f32imm, fpimm>; 1367 1368defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1369 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1370defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1371 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1372defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1373 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1374 1375// atom_sub 1376 1377def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1378 (atomic_load_sub_32 node:$a, node:$b)>; 1379def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1380 (atomic_load_sub_32 node:$a, node:$b)>; 1381def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1382 (atomic_load_sub_32 node:$a, node:$b)>; 1383def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1384 (atomic_load_sub_64 node:$a, node:$b)>; 1385def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1386 (atomic_load_sub_64 node:$a, node:$b)>; 1387def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1388 (atomic_load_sub_64 node:$a, node:$b)>; 1389 1390defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1391 atomic_load_sub_32_g>; 1392defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1393 atomic_load_sub_64_g>; 1394defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1395 atomic_load_sub_32_gen>; 1396defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1397 ".add", atomic_load_sub_32_gen>; 1398defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1399 atomic_load_sub_32_s>; 1400defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1401 atomic_load_sub_64_s>; 1402defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1403 atomic_load_sub_64_gen>; 1404defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1405 ".add", atomic_load_sub_64_gen>; 1406 1407// atom_swap 1408 1409def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1410 (atomic_swap_32 node:$a, node:$b)>; 1411def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1412 (atomic_swap_32 node:$a, node:$b)>; 1413def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1414 (atomic_swap_32 node:$a, node:$b)>; 1415def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1416 (atomic_swap_64 node:$a, node:$b)>; 1417def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1418 (atomic_swap_64 node:$a, node:$b)>; 1419def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1420 (atomic_swap_64 node:$a, node:$b)>; 1421 1422defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1423 atomic_swap_32_g, i32imm, imm>; 1424defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1425 atomic_swap_32_s, i32imm, imm>; 1426defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1427 atomic_swap_32_gen, i32imm, imm>; 1428defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1429 ".exch", atomic_swap_32_gen, i32imm, imm>; 1430defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1431 atomic_swap_64_g, i64imm, imm>; 1432defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1433 atomic_swap_64_s, i64imm, imm>; 1434defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1435 atomic_swap_64_gen, i64imm, imm>; 1436defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1437 ".exch", atomic_swap_64_gen, i64imm, imm>; 1438 1439// atom_max 1440 1441def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1442 , (atomic_load_max_32 node:$a, node:$b)>; 1443def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1444 (atomic_load_max_32 node:$a, node:$b)>; 1445def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1446 (atomic_load_max_32 node:$a, node:$b)>; 1447def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1448 , (atomic_load_max_64 node:$a, node:$b)>; 1449def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1450 (atomic_load_max_64 node:$a, node:$b)>; 1451def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1452 (atomic_load_max_64 node:$a, node:$b)>; 1453def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1454 (atomic_load_umax_32 node:$a, node:$b)>; 1455def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1456 (atomic_load_umax_32 node:$a, node:$b)>; 1457def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1458 (atomic_load_umax_32 node:$a, node:$b)>; 1459def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1460 (atomic_load_umax_64 node:$a, node:$b)>; 1461def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1462 (atomic_load_umax_64 node:$a, node:$b)>; 1463def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1464 (atomic_load_umax_64 node:$a, node:$b)>; 1465 1466defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1467 ".max", atomic_load_max_32_g, i32imm, imm>; 1468defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1469 ".max", atomic_load_max_32_s, i32imm, imm>; 1470defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1471 atomic_load_max_32_gen, i32imm, imm>; 1472defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1473 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1474defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1475 ".max", atomic_load_max_64_g, i64imm, imm>; 1476defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1477 ".max", atomic_load_max_64_s, i64imm, imm>; 1478defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1479 atomic_load_max_64_gen, i64imm, imm>; 1480defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1481 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>; 1482defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1483 ".max", atomic_load_umax_32_g, i32imm, imm>; 1484defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1485 ".max", atomic_load_umax_32_s, i32imm, imm>; 1486defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1487 atomic_load_umax_32_gen, i32imm, imm>; 1488defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1489 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1490defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1491 ".max", atomic_load_umax_64_g, i64imm, imm>; 1492defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1493 ".max", atomic_load_umax_64_s, i64imm, imm>; 1494defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1495 atomic_load_umax_64_gen, i64imm, imm>; 1496defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1497 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>; 1498 1499// atom_min 1500 1501def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1502 (atomic_load_min_32 node:$a, node:$b)>; 1503def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1504 (atomic_load_min_32 node:$a, node:$b)>; 1505def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1506 (atomic_load_min_32 node:$a, node:$b)>; 1507def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1508 (atomic_load_min_64 node:$a, node:$b)>; 1509def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1510 (atomic_load_min_64 node:$a, node:$b)>; 1511def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1512 (atomic_load_min_64 node:$a, node:$b)>; 1513def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1514 (atomic_load_umin_32 node:$a, node:$b)>; 1515def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1516 (atomic_load_umin_32 node:$a, node:$b)>; 1517def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1518 (atomic_load_umin_32 node:$a, node:$b)>; 1519def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1520 (atomic_load_umin_64 node:$a, node:$b)>; 1521def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1522 (atomic_load_umin_64 node:$a, node:$b)>; 1523def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1524 (atomic_load_umin_64 node:$a, node:$b)>; 1525 1526defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1527 ".min", atomic_load_min_32_g, i32imm, imm>; 1528defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1529 ".min", atomic_load_min_32_s, i32imm, imm>; 1530defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1531 atomic_load_min_32_gen, i32imm, imm>; 1532defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1533 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1534defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1535 ".min", atomic_load_min_64_g, i64imm, imm>; 1536defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1537 ".min", atomic_load_min_64_s, i64imm, imm>; 1538defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1539 atomic_load_min_64_gen, i64imm, imm>; 1540defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1541 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>; 1542defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1543 ".min", atomic_load_umin_32_g, i32imm, imm>; 1544defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1545 ".min", atomic_load_umin_32_s, i32imm, imm>; 1546defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1547 atomic_load_umin_32_gen, i32imm, imm>; 1548defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1549 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1550defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1551 ".min", atomic_load_umin_64_g, i64imm, imm>; 1552defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1553 ".min", atomic_load_umin_64_s, i64imm, imm>; 1554defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1555 atomic_load_umin_64_gen, i64imm, imm>; 1556defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1557 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>; 1558 1559// atom_inc atom_dec 1560 1561def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1562 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1563def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1564 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1565def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1566 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1567def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1568 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1569def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1570 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1571def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1572 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1573 1574defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1575 atomic_load_inc_32_g, i32imm, imm>; 1576defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1577 atomic_load_inc_32_s, i32imm, imm>; 1578defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1579 atomic_load_inc_32_gen, i32imm, imm>; 1580defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1581 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1582defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1583 atomic_load_dec_32_g, i32imm, imm>; 1584defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1585 atomic_load_dec_32_s, i32imm, imm>; 1586defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1587 atomic_load_dec_32_gen, i32imm, imm>; 1588defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1589 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1590 1591// atom_and 1592 1593def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1594 (atomic_load_and_32 node:$a, node:$b)>; 1595def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1596 (atomic_load_and_32 node:$a, node:$b)>; 1597def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1598 (atomic_load_and_32 node:$a, node:$b)>; 1599def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1600 (atomic_load_and_64 node:$a, node:$b)>; 1601def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1602 (atomic_load_and_64 node:$a, node:$b)>; 1603def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1604 (atomic_load_and_64 node:$a, node:$b)>; 1605 1606defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1607 atomic_load_and_32_g, i32imm, imm>; 1608defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1609 atomic_load_and_32_s, i32imm, imm>; 1610defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1611 atomic_load_and_32_gen, i32imm, imm>; 1612defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1613 ".and", atomic_load_and_32_gen, i32imm, imm>; 1614defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1615 atomic_load_and_64_g, i64imm, imm>; 1616defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1617 atomic_load_and_64_s, i64imm, imm>; 1618defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1619 atomic_load_and_64_gen, i64imm, imm>; 1620defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1621 ".and", atomic_load_and_64_gen, i64imm, imm>; 1622 1623// atom_or 1624 1625def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1626 (atomic_load_or_32 node:$a, node:$b)>; 1627def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1628 (atomic_load_or_32 node:$a, node:$b)>; 1629def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1630 (atomic_load_or_32 node:$a, node:$b)>; 1631def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1632 (atomic_load_or_64 node:$a, node:$b)>; 1633def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1634 (atomic_load_or_64 node:$a, node:$b)>; 1635def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1636 (atomic_load_or_64 node:$a, node:$b)>; 1637 1638defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1639 atomic_load_or_32_g, i32imm, imm>; 1640defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1641 atomic_load_or_32_gen, i32imm, imm>; 1642defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1643 ".or", atomic_load_or_32_gen, i32imm, imm>; 1644defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1645 atomic_load_or_32_s, i32imm, imm>; 1646defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1647 atomic_load_or_64_g, i64imm, imm>; 1648defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1649 atomic_load_or_64_gen, i64imm, imm>; 1650defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1651 ".or", atomic_load_or_64_gen, i64imm, imm>; 1652defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1653 atomic_load_or_64_s, i64imm, imm>; 1654 1655// atom_xor 1656 1657def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1658 (atomic_load_xor_32 node:$a, node:$b)>; 1659def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1660 (atomic_load_xor_32 node:$a, node:$b)>; 1661def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1662 (atomic_load_xor_32 node:$a, node:$b)>; 1663def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1664 (atomic_load_xor_64 node:$a, node:$b)>; 1665def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1666 (atomic_load_xor_64 node:$a, node:$b)>; 1667def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1668 (atomic_load_xor_64 node:$a, node:$b)>; 1669 1670defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1671 atomic_load_xor_32_g, i32imm, imm>; 1672defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1673 atomic_load_xor_32_s, i32imm, imm>; 1674defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1675 atomic_load_xor_32_gen, i32imm, imm>; 1676defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1677 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1678defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1679 atomic_load_xor_64_g, i64imm, imm>; 1680defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1681 atomic_load_xor_64_s, i64imm, imm>; 1682defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1683 atomic_load_xor_64_gen, i64imm, imm>; 1684defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1685 ".xor", atomic_load_xor_64_gen, i64imm, imm>; 1686 1687// atom_cas 1688 1689def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1690 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1691def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1692 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1693def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1694 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1695def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1696 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1697def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1698 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1699def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1700 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1701 1702defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1703 atomic_cmp_swap_32_g, i32imm>; 1704defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1705 atomic_cmp_swap_32_s, i32imm>; 1706defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1707 atomic_cmp_swap_32_gen, i32imm>; 1708defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1709 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1710defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1711 atomic_cmp_swap_64_g, i64imm>; 1712defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1713 atomic_cmp_swap_64_s, i64imm>; 1714defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1715 atomic_cmp_swap_64_gen, i64imm>; 1716defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1717 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1718 1719// Support for scoped atomic operations. Matches 1720// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1721// and converts it into the appropriate instruction. 1722// NOTE: not all possible combinations are implemented 1723// 'space' is limited to generic as it's the only one needed to support CUDA. 1724// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1725class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1726 dag ins, dag Operands> 1727 : NVPTXInst<(outs regclass:$result), ins, 1728 AsmStr, 1729 [(set regclass:$result, Operands)]>, 1730 Requires<Preds>; 1731 1732// Define instruction variants for all addressing modes. 1733multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1734 NVPTXRegClass regclass, Operand ImmType, 1735 SDNode Imm, ValueType ImmTy, 1736 list<Predicate> Preds> { 1737 let AddedComplexity = 1 in { 1738 def : ATOM23_impl<AsmStr, regclass, Preds, 1739 (ins Int32Regs:$src, regclass:$b), 1740 (Intr Int32Regs:$src, regclass:$b)>; 1741 def : ATOM23_impl<AsmStr, regclass, Preds, 1742 (ins Int64Regs:$src, regclass:$b), 1743 (Intr Int64Regs:$src, regclass:$b)>; 1744 } 1745 // tablegen can't infer argument types from Intrinsic (though it can 1746 // from Instruction) so we have to enforce specific type on 1747 // immediates via explicit cast to ImmTy. 1748 def : ATOM23_impl<AsmStr, regclass, Preds, 1749 (ins Int32Regs:$src, ImmType:$b), 1750 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1751 def : ATOM23_impl<AsmStr, regclass, Preds, 1752 (ins Int64Regs:$src, ImmType:$b), 1753 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1754} 1755 1756multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1757 NVPTXRegClass regclass, Operand ImmType, 1758 SDNode Imm, ValueType ImmTy, 1759 list<Predicate> Preds> { 1760 // Variants for register/immediate permutations of $b and $c 1761 let AddedComplexity = 2 in { 1762 def : ATOM23_impl<AsmStr, regclass, Preds, 1763 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1764 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1765 def : ATOM23_impl<AsmStr, regclass, Preds, 1766 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1767 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1768 } 1769 let AddedComplexity = 1 in { 1770 def : ATOM23_impl<AsmStr, regclass, Preds, 1771 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1772 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1773 def : ATOM23_impl<AsmStr, regclass, Preds, 1774 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1775 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1776 def : ATOM23_impl<AsmStr, regclass, Preds, 1777 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1778 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1779 def : ATOM23_impl<AsmStr, regclass, Preds, 1780 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 1781 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1782 } 1783 def : ATOM23_impl<AsmStr, regclass, Preds, 1784 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 1785 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1786 def : ATOM23_impl<AsmStr, regclass, Preds, 1787 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 1788 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1789} 1790 1791// Constructs instrinsic name and instruction asm strings. 1792multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 1793 string ScopeStr, string SpaceStr, 1794 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1795 ValueType ImmTy, list<Predicate> Preds> { 1796 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1797 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1798 # "." # OpStr # "." # TypeStr 1799 # " \t$result, [$src], $b;", 1800 !cast<Intrinsic>( 1801 "int_nvvm_atomic_" # OpStr 1802 # "_" # SpaceStr # "_" # IntTypeStr 1803 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1804 regclass, ImmType, Imm, ImmTy, Preds>; 1805} 1806multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 1807 string ScopeStr, string SpaceStr, 1808 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1809 ValueType ImmTy, list<Predicate> Preds> { 1810 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1811 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1812 # "." # OpStr # "." # TypeStr 1813 # " \t$result, [$src], $b, $c;", 1814 !cast<Intrinsic>( 1815 "int_nvvm_atomic_" # OpStr 1816 # "_" # SpaceStr # "_" # IntTypeStr 1817 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1818 regclass, ImmType, Imm, ImmTy, Preds>; 1819} 1820 1821// Constructs variants for different address spaces. 1822// For now we only need variants for generic space pointers. 1823multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 1824 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1825 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1826 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1827 regclass, ImmType, Imm, ImmTy, Preds>; 1828} 1829multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 1830 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1831 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1832 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1833 regclass, ImmType, Imm, ImmTy, Preds>; 1834} 1835 1836// Constructs variants for different scopes of atomic op. 1837multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 1838 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1839 ValueType ImmTy, list<Predicate> Preds> { 1840 // .gpu scope is default and is currently covered by existing 1841 // atomics w/o explicitly specified scope. 1842 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1843 regclass, ImmType, Imm, ImmTy, 1844 !listconcat(Preds,[hasAtomScope])>; 1845 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1846 regclass, ImmType, Imm, ImmTy, 1847 !listconcat(Preds,[hasAtomScope])>; 1848} 1849multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 1850 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 1851 list<Predicate> Preds> { 1852 // No need to define ".gpu"-scoped atomics. They do the same thing 1853 // as the regular, non-scoped atomics defined elsewhere. 1854 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1855 regclass, ImmType, Imm, ImmTy, 1856 !listconcat(Preds,[hasAtomScope])>; 1857 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1858 regclass, ImmType, Imm, ImmTy, 1859 !listconcat(Preds,[hasAtomScope])>; 1860} 1861 1862// atom.add 1863multiclass ATOM2_add_impl<string OpStr> { 1864 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1865 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1866 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 1867 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 1868 []>; 1869 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 1870 [hasAtomAddF64]>; 1871} 1872 1873// atom.{and,or,xor} 1874multiclass ATOM2_bitwise_impl<string OpStr> { 1875 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1876 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 1877 [hasAtomBitwise64]>; 1878} 1879 1880// atom.exch 1881multiclass ATOM2_exch_impl<string OpStr> { 1882 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1883 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1884} 1885 1886// atom.{min,max} 1887multiclass ATOM2_minmax_impl<string OpStr> { 1888 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1889 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1890 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 1891 [hasAtomMinMax64]>; 1892 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 1893 [hasAtomMinMax64]>; 1894} 1895 1896// atom.{inc,dec} 1897multiclass ATOM2_incdec_impl<string OpStr> { 1898 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1899} 1900 1901// atom.cas 1902multiclass ATOM3_cas_impl<string OpStr> { 1903 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1904 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1905} 1906 1907defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 1908defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 1909defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 1910defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 1911defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 1912defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 1913defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 1914defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 1915defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 1916defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 1917 1918//----------------------------------- 1919// Support for ldu on sm_20 or later 1920//----------------------------------- 1921 1922// Don't annotate ldu instructions as mayLoad, as they load from memory that is 1923// read-only in a kernel. 1924 1925// Scalar 1926 1927multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 1928 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1929 !strconcat("ldu.global.", TyStr), 1930 []>, Requires<[hasLDU]>; 1931 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1932 !strconcat("ldu.global.", TyStr), 1933 []>, Requires<[hasLDU]>; 1934 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1935 !strconcat("ldu.global.", TyStr), 1936 []>, Requires<[hasLDU]>; 1937 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1938 !strconcat("ldu.global.", TyStr), 1939 []>, Requires<[hasLDU]>; 1940 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1941 !strconcat("ldu.global.", TyStr), 1942 []>, Requires<[hasLDU]>; 1943} 1944 1945defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 1946defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 1947defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1948defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1949defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; 1950defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; 1951defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 1952defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 1953defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1954defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1955 1956// vector 1957 1958// Elementized vector ldu 1959multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1960 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1961 (ins Int32Regs:$src), 1962 !strconcat("ldu.global.", TyStr), []>; 1963 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1964 (ins Int64Regs:$src), 1965 !strconcat("ldu.global.", TyStr), []>; 1966 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1967 (ins MEMri:$src), 1968 !strconcat("ldu.global.", TyStr), []>; 1969 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1970 (ins MEMri64:$src), 1971 !strconcat("ldu.global.", TyStr), []>; 1972 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1973 (ins imemAny:$src), 1974 !strconcat("ldu.global.", TyStr), []>; 1975} 1976 1977multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1978 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1979 regclass:$dst4), (ins Int32Regs:$src), 1980 !strconcat("ldu.global.", TyStr), []>; 1981 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1982 regclass:$dst4), (ins Int64Regs:$src), 1983 !strconcat("ldu.global.", TyStr), []>; 1984 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1985 regclass:$dst4), (ins MEMri:$src), 1986 !strconcat("ldu.global.", TyStr), []>; 1987 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1988 regclass:$dst4), (ins MEMri64:$src), 1989 !strconcat("ldu.global.", TyStr), []>; 1990 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1991 regclass:$dst4), (ins imemAny:$src), 1992 !strconcat("ldu.global.", TyStr), []>; 1993} 1994 1995defm INT_PTX_LDU_G_v2i8_ELE 1996 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1997defm INT_PTX_LDU_G_v2i16_ELE 1998 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1999defm INT_PTX_LDU_G_v2i32_ELE 2000 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2001defm INT_PTX_LDU_G_v2f16_ELE 2002 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 2003defm INT_PTX_LDU_G_v2f16x2_ELE 2004 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 2005defm INT_PTX_LDU_G_v2f32_ELE 2006 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2007defm INT_PTX_LDU_G_v2i64_ELE 2008 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2009defm INT_PTX_LDU_G_v2f64_ELE 2010 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2011defm INT_PTX_LDU_G_v4i8_ELE 2012 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2013defm INT_PTX_LDU_G_v4i16_ELE 2014 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2015 Int16Regs>; 2016defm INT_PTX_LDU_G_v4i32_ELE 2017 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2018 Int32Regs>; 2019defm INT_PTX_LDU_G_v4f16_ELE 2020 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2021 Float16Regs>; 2022defm INT_PTX_LDU_G_v4f16x2_ELE 2023 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2024 Float16x2Regs>; 2025defm INT_PTX_LDU_G_v4f32_ELE 2026 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2027 Float32Regs>; 2028 2029 2030//----------------------------------- 2031// Support for ldg on sm_35 or later 2032//----------------------------------- 2033 2034// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2035// non-coherent texture cache, and therefore the values read must be read-only 2036// during the lifetime of the kernel. 2037 2038multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2039 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2040 !strconcat("ld.global.nc.", TyStr), 2041 []>, Requires<[hasLDG]>; 2042 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2043 !strconcat("ld.global.nc.", TyStr), 2044 []>, Requires<[hasLDG]>; 2045 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2046 !strconcat("ld.global.nc.", TyStr), 2047 []>, Requires<[hasLDG]>; 2048 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2049 !strconcat("ld.global.nc.", TyStr), 2050 []>, Requires<[hasLDG]>; 2051 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2052 !strconcat("ld.global.nc.", TyStr), 2053 []>, Requires<[hasLDG]>; 2054} 2055 2056defm INT_PTX_LDG_GLOBAL_i8 2057 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2058defm INT_PTX_LDG_GLOBAL_i16 2059 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2060defm INT_PTX_LDG_GLOBAL_i32 2061 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2062defm INT_PTX_LDG_GLOBAL_i64 2063 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2064defm INT_PTX_LDG_GLOBAL_f16 2065 : LDG_G<"b16 \t$result, [$src];", Float16Regs>; 2066defm INT_PTX_LDG_GLOBAL_f16x2 2067 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; 2068defm INT_PTX_LDG_GLOBAL_f32 2069 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2070defm INT_PTX_LDG_GLOBAL_f64 2071 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2072defm INT_PTX_LDG_GLOBAL_p32 2073 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2074defm INT_PTX_LDG_GLOBAL_p64 2075 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2076 2077// vector 2078 2079// Elementized vector ldg 2080multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2081 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2082 (ins Int32Regs:$src), 2083 !strconcat("ld.global.nc.", TyStr), []>; 2084 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2085 (ins Int64Regs:$src), 2086 !strconcat("ld.global.nc.", TyStr), []>; 2087 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2088 (ins MEMri:$src), 2089 !strconcat("ld.global.nc.", TyStr), []>; 2090 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2091 (ins MEMri64:$src), 2092 !strconcat("ld.global.nc.", TyStr), []>; 2093 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2094 (ins imemAny:$src), 2095 !strconcat("ld.global.nc.", TyStr), []>; 2096} 2097 2098multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2099 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2100 regclass:$dst4), (ins Int32Regs:$src), 2101 !strconcat("ld.global.nc.", TyStr), []>; 2102 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2103 regclass:$dst4), (ins Int64Regs:$src), 2104 !strconcat("ld.global.nc.", TyStr), []>; 2105 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2106 regclass:$dst4), (ins MEMri:$src), 2107 !strconcat("ld.global.nc.", TyStr), []>; 2108 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2109 regclass:$dst4), (ins MEMri64:$src), 2110 !strconcat("ld.global.nc.", TyStr), []>; 2111 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2112 regclass:$dst4), (ins imemAny:$src), 2113 !strconcat("ld.global.nc.", TyStr), []>; 2114} 2115 2116// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2117defm INT_PTX_LDG_G_v2i8_ELE 2118 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2119defm INT_PTX_LDG_G_v2i16_ELE 2120 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2121defm INT_PTX_LDG_G_v2i32_ELE 2122 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2123defm INT_PTX_LDG_G_v2f16_ELE 2124 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 2125defm INT_PTX_LDG_G_v2f16x2_ELE 2126 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 2127defm INT_PTX_LDG_G_v2f32_ELE 2128 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2129defm INT_PTX_LDG_G_v2i64_ELE 2130 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2131defm INT_PTX_LDG_G_v2f64_ELE 2132 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2133defm INT_PTX_LDG_G_v4i8_ELE 2134 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2135defm INT_PTX_LDG_G_v4i16_ELE 2136 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2137defm INT_PTX_LDG_G_v4i32_ELE 2138 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2139defm INT_PTX_LDG_G_v4f16_ELE 2140 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; 2141defm INT_PTX_LDG_G_v4f16x2_ELE 2142 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; 2143defm INT_PTX_LDG_G_v4f32_ELE 2144 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2145 2146 2147multiclass NG_TO_G<string Str, Intrinsic Intrin> { 2148 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2149 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 2150 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2151 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2152 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 2153 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2154 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 2155 "{{ .reg .b64 %tmp;\n\t" 2156 #" cvt.u64.u32 \t%tmp, $src;\n\t" 2157 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 2158 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 2159 Requires<[useShortPtr]>; 2160} 2161 2162multiclass G_TO_NG<string Str, Intrinsic Intrin> { 2163 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2164 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 2165 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2166 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2167 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 2168 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2169 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 2170 "{{ .reg .b64 %tmp;\n\t" 2171 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 2172 #" cvt.u32.u64 \t$result, %tmp; }}", 2173 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 2174 Requires<[useShortPtr]>; 2175} 2176 2177defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 2178defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 2179defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 2180defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 2181 2182defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 2183defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 2184defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 2185defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 2186 2187 2188// nvvm.ptr.gen.to.param 2189def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 2190 (ins Int32Regs:$src), 2191 "mov.u32 \t$result, $src;", 2192 [(set Int32Regs:$result, 2193 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 2194def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 2195 (ins Int64Regs:$src), 2196 "mov.u64 \t$result, $src;", 2197 [(set Int64Regs:$result, 2198 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2199 2200 2201// nvvm.move intrinsicc 2202def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2203 "mov.b16 \t$r, $s;", 2204 [(set Int16Regs:$r, 2205 (int_nvvm_move_i16 Int16Regs:$s))]>; 2206def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2207 "mov.b32 \t$r, $s;", 2208 [(set Int32Regs:$r, 2209 (int_nvvm_move_i32 Int32Regs:$s))]>; 2210def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2211 "mov.b64 \t$r, $s;", 2212 [(set Int64Regs:$r, 2213 (int_nvvm_move_i64 Int64Regs:$s))]>; 2214def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2215 "mov.f32 \t$r, $s;", 2216 [(set Float32Regs:$r, 2217 (int_nvvm_move_float Float32Regs:$s))]>; 2218def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2219 "mov.f64 \t$r, $s;", 2220 [(set Float64Regs:$r, 2221 (int_nvvm_move_double Float64Regs:$s))]>; 2222def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2223 "mov.u32 \t$r, $s;", 2224 [(set Int32Regs:$r, 2225 (int_nvvm_move_ptr Int32Regs:$s))]>; 2226def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2227 "mov.u64 \t$r, $s;", 2228 [(set Int64Regs:$r, 2229 (int_nvvm_move_ptr Int64Regs:$s))]>; 2230 2231// @TODO: Are these actually needed, or will we always just see symbols 2232// copied to registers first? 2233/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2234 "mov.u32 \t$r, $s;", 2235 [(set Int32Regs:$r, 2236 (int_nvvm_move_ptr texternalsym:$s))]>; 2237def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2238 "mov.u64 \t$r, $s;", 2239 [(set Int64Regs:$r, 2240 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2241 2242 2243// MoveParam %r1, param 2244// ptr_local_to_gen %r2, %r1 2245// ptr_gen_to_local %r3, %r2 2246// -> 2247// mov %r1, param 2248 2249// @TODO: Revisit this. There is a type 2250// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2251// instructions are not currently defined. However, we can use the ptr 2252// variants and the asm printer will do the right thing. 2253def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2254 (MoveParam texternalsym:$src)))), 2255 (nvvm_move_ptr64 texternalsym:$src)>; 2256def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2257 (MoveParam texternalsym:$src)))), 2258 (nvvm_move_ptr32 texternalsym:$src)>; 2259 2260def texsurf_handles 2261 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2262 "mov.u64 \t$result, $src;", []>; 2263 2264//----------------------------------- 2265// Compiler Error Warn 2266// - Just ignore them in codegen 2267//----------------------------------- 2268 2269def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2270 "// llvm.nvvm.compiler.warn()", 2271 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2272def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2273 "// llvm.nvvm.compiler.warn()", 2274 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2275def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2276 "// llvm.nvvm.compiler.error()", 2277 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2278def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2279 "// llvm.nvvm.compiler.error()", 2280 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2281 2282 2283// isspacep 2284 2285def ISSPACEP_CONST_32 2286 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2287 "isspacep.const \t$d, $a;", 2288 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 2289 Requires<[hasPTX31]>; 2290def ISSPACEP_CONST_64 2291 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2292 "isspacep.const \t$d, $a;", 2293 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 2294 Requires<[hasPTX31]>; 2295def ISSPACEP_GLOBAL_32 2296 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2297 "isspacep.global \t$d, $a;", 2298 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 2299def ISSPACEP_GLOBAL_64 2300 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2301 "isspacep.global \t$d, $a;", 2302 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 2303def ISSPACEP_LOCAL_32 2304 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2305 "isspacep.local \t$d, $a;", 2306 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 2307def ISSPACEP_LOCAL_64 2308 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2309 "isspacep.local \t$d, $a;", 2310 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 2311def ISSPACEP_SHARED_32 2312 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2313 "isspacep.shared \t$d, $a;", 2314 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 2315def ISSPACEP_SHARED_64 2316 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2317 "isspacep.shared \t$d, $a;", 2318 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 2319 2320 2321// Special register reads 2322def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2323 (ins SpecialRegs:$r), 2324 "mov.b32 \t$d, $r;", []>; 2325 2326def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2327def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2328def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2329def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2330def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2331def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2332def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2333def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2334def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2335def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2336def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2337def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2338def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2339def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2340def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2341def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2342def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2343def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2344def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2345def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2346def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2347def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2348def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2349def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2350def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2351def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2352def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2353def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2354def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2355def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2356def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2357def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2358 2359 2360// rotate builtin support 2361 2362def ROTATE_B32_HW_IMM 2363 : NVPTXInst<(outs Int32Regs:$dst), 2364 (ins Int32Regs:$src, i32imm:$amt), 2365 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2366 [(set Int32Regs:$dst, 2367 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2368 Requires<[hasHWROT32]> ; 2369 2370def ROTATE_B32_HW_REG 2371 : NVPTXInst<(outs Int32Regs:$dst), 2372 (ins Int32Regs:$src, Int32Regs:$amt), 2373 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2374 [(set Int32Regs:$dst, 2375 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2376 Requires<[hasHWROT32]> ; 2377 2378def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2379 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2380 Requires<[noHWROT32]> ; 2381 2382def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2383 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2384 Requires<[noHWROT32]> ; 2385 2386let hasSideEffects = false in { 2387 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2388 !strconcat("{{\n\t", 2389 ".reg .b32 %dummy;\n\t", 2390 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2391 "}}"), 2392 []> ; 2393 2394 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2395 !strconcat("{{\n\t", 2396 ".reg .b32 %dummy;\n\t", 2397 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2398 "}}"), 2399 []> ; 2400} 2401 2402let hasSideEffects = false in { 2403 def PACK_TWO_INT32 2404 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2405 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2406} 2407 2408def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2409 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2410 (GET_LO_INT64 Int64Regs:$src))> ; 2411 2412// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2413// no side effects. 2414let hasSideEffects = false in { 2415 def SHF_L_WRAP_B32_IMM 2416 : NVPTXInst<(outs Int32Regs:$dst), 2417 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2418 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2419 Requires<[hasHWROT32]>; 2420 2421 def SHF_L_WRAP_B32_REG 2422 : NVPTXInst<(outs Int32Regs:$dst), 2423 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2424 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2425 Requires<[hasHWROT32]>; 2426 2427 def SHF_R_WRAP_B32_IMM 2428 : NVPTXInst<(outs Int32Regs:$dst), 2429 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2430 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2431 Requires<[hasHWROT32]>; 2432 2433 def SHF_R_WRAP_B32_REG 2434 : NVPTXInst<(outs Int32Regs:$dst), 2435 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2436 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2437 Requires<[hasHWROT32]>; 2438} 2439 2440// HW version of rotate 64 2441def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2442 (PACK_TWO_INT32 2443 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2444 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2445 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2446 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2447 Requires<[hasHWROT32]>; 2448 2449def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2450 (PACK_TWO_INT32 2451 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2452 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2453 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2454 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2455 Requires<[hasHWROT32]>; 2456 2457 2458def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2459 (PACK_TWO_INT32 2460 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2461 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2462 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2463 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2464 Requires<[hasHWROT32]>; 2465 2466def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2467 (PACK_TWO_INT32 2468 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2469 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2470 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2471 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2472 Requires<[hasHWROT32]>; 2473 2474// SW version of rotate 64 2475def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2476 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2477 Requires<[noHWROT32]>; 2478def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2479 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2480 Requires<[noHWROT32]>; 2481def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2482 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2483 Requires<[noHWROT32]>; 2484def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2485 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2486 Requires<[noHWROT32]>; 2487 2488 2489//----------------------------------- 2490// Texture Intrinsics 2491//----------------------------------- 2492 2493// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2494// also defined in NVPTXReplaceImageHandles.cpp 2495 2496// texmode_independent 2497let IsTex = true, IsTexModeUnified = false in { 2498// Texture fetch instructions using handles 2499 2500class TEX_1D_base<string inst, NVPTXRegClass outtype, 2501 NVPTXRegClass intype, dag texsamp> 2502 : NVPTXInst<(outs outtype:$r, outtype:$g, 2503 outtype:$b, outtype:$a), 2504 !con(texsamp, (ins intype:$x)), 2505 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2506 []>; 2507 2508multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2509 def _RR : TEX_1D_base<inst, outtype, intype, 2510 (ins Int64Regs:$t, Int64Regs:$s)>; 2511 def _RI : TEX_1D_base<inst, outtype, intype, 2512 (ins Int64Regs:$t, i64imm:$s)>; 2513 def _IR : TEX_1D_base<inst, outtype, intype, 2514 (ins i64imm:$t, Int64Regs:$s)>; 2515 def _II : TEX_1D_base<inst, outtype, intype, 2516 (ins i64imm:$t, i64imm:$s)>; 2517} 2518 2519defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 2520defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2521defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 2522defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2523defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 2524defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2525 2526class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 2527 NVPTXRegClass intype, dag texsamp> 2528 : NVPTXInst<(outs outtype:$r, outtype:$g, 2529 outtype:$b, outtype:$a), 2530 !con(texsamp, (ins intype:$x, intype:$lod)), 2531 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", 2532 []>; 2533 2534multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, 2535 NVPTXRegClass intype> { 2536 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype, 2537 (ins Int64Regs:$t, Int64Regs:$s)>; 2538 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, 2539 (ins Int64Regs:$t, i64imm:$s)>; 2540 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, 2541 (ins i64imm:$t, Int64Regs:$s)>; 2542 def _II : TEX_1D_LEVEL_base<inst, outtype, intype, 2543 (ins i64imm:$t, i64imm:$s)>; 2544} 2545 2546defm TEX_1D_F32_F32_LEVEL : 2547 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2548defm TEX_1D_S32_F32_LEVEL : 2549 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2550defm TEX_1D_U32_F32_LEVEL : 2551 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2552 2553class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, 2554 NVPTXRegClass intype, dag texsamp> 2555 : NVPTXInst<(outs outtype:$r, outtype:$g, 2556 outtype:$b, outtype:$a), 2557 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), 2558 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," 2559 " \\{$gradx\\}, \\{$grady\\};", 2560 []>; 2561 2562multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, 2563 NVPTXRegClass intype> { 2564 def _RR : TEX_1D_GRAD_base<inst, outtype, intype, 2565 (ins Int64Regs:$t, Int64Regs:$s)>; 2566 def _RI : TEX_1D_GRAD_base<inst, outtype, intype, 2567 (ins Int64Regs:$t, i64imm:$s)>; 2568 def _IR : TEX_1D_GRAD_base<inst, outtype, intype, 2569 (ins i64imm:$t, Int64Regs:$s)>; 2570 def _II : TEX_1D_GRAD_base<inst, outtype, intype, 2571 (ins i64imm:$t, i64imm:$s)>; 2572} 2573 2574defm TEX_1D_F32_F32_GRAD 2575 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2576defm TEX_1D_S32_F32_GRAD 2577 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2578defm TEX_1D_U32_F32_GRAD 2579 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2580 2581class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 2582 NVPTXRegClass intype, dag texsamp> 2583 : NVPTXInst<(outs outtype:$r, outtype:$g, 2584 outtype:$b, outtype:$a), 2585 !con(texsamp, (ins Int32Regs:$l, intype:$x)), 2586 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", 2587 []>; 2588 2589multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, 2590 NVPTXRegClass intype> { 2591 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype, 2592 (ins Int64Regs:$t, Int64Regs:$s)>; 2593 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, 2594 (ins Int64Regs:$t, i64imm:$s)>; 2595 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, 2596 (ins i64imm:$t, Int64Regs:$s)>; 2597 def _II : TEX_1D_ARRAY_base<inst, outtype, intype, 2598 (ins i64imm:$t, i64imm:$s)>; 2599} 2600 2601defm TEX_1D_ARRAY_F32_F32 2602 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2603defm TEX_1D_ARRAY_F32_S32 2604 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 2605defm TEX_1D_ARRAY_S32_S32 2606 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 2607defm TEX_1D_ARRAY_S32_F32 2608 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2609defm TEX_1D_ARRAY_U32_S32 2610 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 2611defm TEX_1D_ARRAY_U32_F32 2612 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2613 2614class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2615 NVPTXRegClass intype, dag texsamp> 2616 : NVPTXInst<(outs outtype:$r, outtype:$g, 2617 outtype:$b, outtype:$a), 2618 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), 2619 inst # " \t\\{$r, $g, $b, $a\\}," 2620 " [$t, $s, \\{$l, $x\\}], $lod;", 2621 []>; 2622 2623multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2624 NVPTXRegClass intype> { 2625 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2626 (ins Int64Regs:$t, Int64Regs:$s)>; 2627 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2628 (ins Int64Regs:$t, i64imm:$s)>; 2629 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2630 (ins i64imm:$t, Int64Regs:$s)>; 2631 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2632 (ins i64imm:$t, i64imm:$s)>; 2633} 2634 2635defm TEX_1D_ARRAY_F32_F32_LEVEL 2636 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2637defm TEX_1D_ARRAY_S32_F32_LEVEL 2638 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2639defm TEX_1D_ARRAY_U32_F32_LEVEL 2640 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2641 2642class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2643 NVPTXRegClass intype, dag texsamp> 2644 : NVPTXInst<(outs outtype:$r, outtype:$g, 2645 outtype:$b, outtype:$a), 2646 !con(texsamp, (ins Int32Regs:$l, intype:$x, 2647 intype:$gradx, intype:$grady)), 2648 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," 2649 " \\{$gradx\\}, \\{$grady\\};", 2650 []>; 2651 2652multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 2653 NVPTXRegClass intype> { 2654 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2655 (ins Int64Regs:$t, Int64Regs:$s)>; 2656 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2657 (ins Int64Regs:$t, i64imm:$s)>; 2658 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2659 (ins i64imm:$t, Int64Regs:$s)>; 2660 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2661 (ins i64imm:$t, i64imm:$s)>; 2662} 2663 2664defm TEX_1D_ARRAY_F32_F32_GRAD 2665 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2666defm TEX_1D_ARRAY_S32_F32_GRAD 2667 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2668defm TEX_1D_ARRAY_U32_F32_GRAD 2669 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2670 2671class TEX_2D_base<string inst, NVPTXRegClass outtype, 2672 NVPTXRegClass intype, dag texsamp> 2673 : NVPTXInst<(outs outtype:$r, outtype:$g, 2674 outtype:$b, outtype:$a), 2675 !con(texsamp, (ins intype:$x, intype:$y)), 2676 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", 2677 []>; 2678 2679multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2680 def _RR : TEX_2D_base<inst, outtype, intype, 2681 (ins Int64Regs:$t, Int64Regs:$s)>; 2682 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; 2683 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; 2684 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; 2685} 2686 2687defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2688defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 2689defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 2690defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2691defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 2692defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2693 2694class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 2695 NVPTXRegClass intype, dag texsamp> 2696 : NVPTXInst<(outs outtype:$r, outtype:$g, 2697 outtype:$b, outtype:$a), 2698 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), 2699 inst # " \t\\{$r, $g, $b, $a\\}," 2700 " [$t, $s, \\{$x, $y\\}], $lod;", 2701 []>; 2702 2703multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, 2704 NVPTXRegClass intype> { 2705 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype, 2706 (ins Int64Regs:$t, Int64Regs:$s)>; 2707 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, 2708 (ins Int64Regs:$t, i64imm:$s)>; 2709 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, 2710 (ins i64imm:$t, Int64Regs:$s)>; 2711 def _II : TEX_2D_LEVEL_base<inst, outtype, intype, 2712 (ins i64imm:$t, i64imm:$s)>; 2713} 2714 2715defm TEX_2D_F32_F32_LEVEL : 2716 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2717defm TEX_2D_S32_F32_LEVEL : 2718 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2719defm TEX_2D_U32_F32_LEVEL : 2720 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2721 2722class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, 2723 NVPTXRegClass intype, dag texsamp> 2724 : NVPTXInst<(outs outtype:$r, outtype:$g, 2725 outtype:$b, outtype:$a), 2726 !con(texsamp, (ins intype:$x, intype:$y, 2727 intype:$gradx0, intype:$gradx1, 2728 intype:$grady0, intype:$grady1)), 2729 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," 2730 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 2731 []>; 2732 2733multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, 2734 NVPTXRegClass intype> { 2735 def _RR : TEX_2D_GRAD_base<inst, outtype, intype, 2736 (ins Int64Regs:$t, Int64Regs:$s)>; 2737 def _RI : TEX_2D_GRAD_base<inst, outtype, intype, 2738 (ins Int64Regs:$t, i64imm:$s)>; 2739 def _IR : TEX_2D_GRAD_base<inst, outtype, intype, 2740 (ins i64imm:$t, Int64Regs:$s)>; 2741 def _II : TEX_2D_GRAD_base<inst, outtype, intype, 2742 (ins i64imm:$t, i64imm:$s)>; 2743} 2744 2745defm TEX_2D_F32_F32_GRAD : 2746 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2747defm TEX_2D_S32_F32_GRAD : 2748 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2749defm TEX_2D_U32_F32_GRAD : 2750 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2751 2752class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 2753 NVPTXRegClass intype, dag texsamp> 2754 : NVPTXInst<(outs outtype:$r, outtype:$g, 2755 outtype:$b, outtype:$a), 2756 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), 2757 inst # " \t\\{$r, $g, $b, $a\\}," 2758 " [$t, $s, \\{$l, $x, $y, $y\\}];", 2759 []>; 2760 2761multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, 2762 NVPTXRegClass intype> { 2763 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype, 2764 (ins Int64Regs:$t, Int64Regs:$s)>; 2765 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, 2766 (ins Int64Regs:$t, i64imm:$s)>; 2767 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, 2768 (ins i64imm:$t, Int64Regs:$s)>; 2769 def _II : TEX_2D_ARRAY_base<inst, outtype, intype, 2770 (ins i64imm:$t, i64imm:$s)>; 2771} 2772 2773defm TEX_2D_ARRAY_F32_F32 2774 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2775defm TEX_2D_ARRAY_F32_S32 2776 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 2777defm TEX_2D_ARRAY_S32_S32 2778 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 2779defm TEX_2D_ARRAY_S32_F32 2780 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2781defm TEX_2D_ARRAY_U32_S32 2782 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 2783defm TEX_2D_ARRAY_U32_F32 2784 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2785 2786class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2787 NVPTXRegClass intype, dag texsamp> 2788 : NVPTXInst<(outs outtype:$r, outtype:$g, 2789 outtype:$b, outtype:$a), 2790 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2791 intype:$lod)), 2792 inst # " \t\\{$r, $g, $b, $a\\}," 2793 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2794 []>; 2795 2796multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2797 NVPTXRegClass intype> { 2798 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2799 (ins Int64Regs:$t, Int64Regs:$s)>; 2800 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2801 (ins Int64Regs:$t, i64imm:$s)>; 2802 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2803 (ins i64imm:$t, Int64Regs:$s)>; 2804 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2805 (ins i64imm:$t, i64imm:$s)>; 2806} 2807 2808defm TEX_2D_ARRAY_F32_F32_LEVEL 2809 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2810defm TEX_2D_ARRAY_S32_F32_LEVEL 2811 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2812defm TEX_2D_ARRAY_U32_F32_LEVEL 2813 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2814 2815class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2816 NVPTXRegClass intype, dag texsamp> 2817 : NVPTXInst<(outs outtype:$r, outtype:$g, 2818 outtype:$b, outtype:$a), 2819 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2820 intype:$gradx0, intype:$gradx1, 2821 intype:$grady0, intype:$grady1)), 2822 inst # " \t\\{$r, $g, $b, $a\\}," 2823 " [$t, $s, \\{$l, $x, $y, $y\\}]," 2824 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 2825 []>; 2826 2827multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 2828 NVPTXRegClass intype> { 2829 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 2830 (ins Int64Regs:$t, Int64Regs:$s)>; 2831 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 2832 (ins Int64Regs:$t, i64imm:$s)>; 2833 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 2834 (ins i64imm:$t, Int64Regs:$s)>; 2835 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 2836 (ins i64imm:$t, i64imm:$s)>; 2837} 2838 2839defm TEX_2D_ARRAY_F32_F32_GRAD 2840 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2841defm TEX_2D_ARRAY_S32_F32_GRAD 2842 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2843defm TEX_2D_ARRAY_U32_F32_GRAD 2844 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2845 2846class TEX_3D_base<string inst, NVPTXRegClass outtype, 2847 NVPTXRegClass intype, dag texsamp> 2848 : NVPTXInst<(outs outtype:$r, outtype:$g, 2849 outtype:$b, outtype:$a), 2850 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 2851 inst # " \t\\{$r, $g, $b, $a\\}," 2852 " [$t, $s, \\{$x, $y, $z, $z\\}];", 2853 []>; 2854 2855multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2856 def _RR : TEX_3D_base<inst, outtype, intype, 2857 (ins Int64Regs:$t, Int64Regs:$s)>; 2858 def _RI : TEX_3D_base<inst, outtype, intype, 2859 (ins Int64Regs:$t, i64imm:$s)>; 2860 def _IR : TEX_3D_base<inst, outtype, intype, 2861 (ins i64imm:$t, Int64Regs:$s)>; 2862 def _II : TEX_3D_base<inst, outtype, intype, 2863 (ins i64imm:$t, i64imm:$s)>; 2864} 2865 2866defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 2867defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 2868defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 2869defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 2870defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 2871defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 2872 2873class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 2874 NVPTXRegClass intype, dag texsamp> 2875 : NVPTXInst<(outs outtype:$r, outtype:$g, 2876 outtype:$b, outtype:$a), 2877 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 2878 intype:$lod)), 2879 inst # " \t\\{$r, $g, $b, $a\\}," 2880 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2881 []>; 2882 2883multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, 2884 NVPTXRegClass intype> { 2885 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype, 2886 (ins Int64Regs:$t, Int64Regs:$s)>; 2887 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, 2888 (ins Int64Regs:$t, i64imm:$s)>; 2889 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, 2890 (ins i64imm:$t, Int64Regs:$s)>; 2891 def _II : TEX_3D_LEVEL_base<inst, outtype, intype, 2892 (ins i64imm:$t, i64imm:$s)>; 2893} 2894 2895defm TEX_3D_F32_F32_LEVEL 2896 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 2897defm TEX_3D_S32_F32_LEVEL 2898 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 2899defm TEX_3D_U32_F32_LEVEL 2900 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 2901 2902class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, 2903 NVPTXRegClass intype, dag texsamp> 2904 : NVPTXInst<(outs outtype:$r, outtype:$g, 2905 outtype:$b, outtype:$a), 2906 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 2907 intype :$gradx0, intype:$gradx1, 2908 intype:$gradx2, intype:$grady0, 2909 intype:$grady1, intype:$grady2)), 2910 inst # " \t\\{$r, $g, $b, $a\\}," 2911 " [$t, $s, \\{$x, $y, $z, $z\\}]," 2912 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 2913 " \\{$grady0, $grady1, $grady2, $grady2\\};", 2914 []>; 2915 2916multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, 2917 NVPTXRegClass intype> { 2918 def _RR : TEX_3D_GRAD_base<inst, outtype, intype, 2919 (ins Int64Regs:$t, Int64Regs:$s)>; 2920 def _RI : TEX_3D_GRAD_base<inst, outtype, intype, 2921 (ins Int64Regs:$t, i64imm:$s)>; 2922 def _IR : TEX_3D_GRAD_base<inst, outtype, intype, 2923 (ins i64imm:$t, Int64Regs:$s)>; 2924 def _II : TEX_3D_GRAD_base<inst, outtype, intype, 2925 (ins i64imm:$t, i64imm:$s)>; 2926} 2927 2928defm TEX_3D_F32_F32_GRAD 2929 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 2930defm TEX_3D_S32_F32_GRAD 2931 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 2932defm TEX_3D_U32_F32_GRAD 2933 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 2934 2935class TEX_CUBE_base<string inst, NVPTXRegClass outtype, 2936 NVPTXRegClass intype, dag texsamp> 2937 : NVPTXInst<(outs outtype:$r, outtype:$g, 2938 outtype:$b, outtype:$a), 2939 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 2940 inst # " \t\\{$r, $g, $b, $a\\}," 2941 " [$t, $s, \\{$x, $y, $z, $z\\}];", 2942 []>; 2943 2944multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2945 def _RR : TEX_CUBE_base<inst, outtype, intype, 2946 (ins Int64Regs:$t, Int64Regs:$s)>; 2947 def _RI : TEX_CUBE_base<inst, outtype, intype, 2948 (ins Int64Regs:$t, i64imm:$s)>; 2949 def _IR : TEX_CUBE_base<inst, outtype, intype, 2950 (ins i64imm:$t, Int64Regs:$s)>; 2951 def _II : TEX_CUBE_base<inst, outtype, intype, 2952 (ins i64imm:$t, i64imm:$s)>; 2953} 2954 2955defm TEX_CUBE_F32_F32 2956 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 2957defm TEX_CUBE_S32_F32 2958 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 2959defm TEX_CUBE_U32_F32 2960 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 2961 2962class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 2963 NVPTXRegClass intype, dag texsamp> 2964 : NVPTXInst<(outs outtype:$r, outtype:$g, 2965 outtype:$b, outtype:$a), 2966 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 2967 intype:$lod)), 2968 inst # " \t\\{$r, $g, $b, $a\\}," 2969 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2970 []>; 2971 2972multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 2973 NVPTXRegClass intype> { 2974 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 2975 (ins Int64Regs:$t, Int64Regs:$s)>; 2976 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, 2977 (ins Int64Regs:$t, i64imm:$s)>; 2978 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 2979 (ins i64imm:$t, Int64Regs:$s)>; 2980 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype, 2981 (ins i64imm:$t, i64imm:$s)>; 2982} 2983 2984defm TEX_CUBE_F32_F32_LEVEL 2985 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; 2986defm TEX_CUBE_S32_F32_LEVEL 2987 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; 2988defm TEX_CUBE_U32_F32_LEVEL 2989 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; 2990 2991class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 2992 NVPTXRegClass intype, dag texsamp> 2993 : NVPTXInst<(outs outtype:$r, outtype:$g, 2994 outtype:$b, outtype:$a), 2995 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2996 intype:$z)), 2997 inst # " \t\\{$r, $g, $b, $a\\}," 2998 " [$t, $s, \\{$l, $x, $y, $z\\}];", 2999 []>; 3000 3001multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3002 NVPTXRegClass intype> { 3003 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3004 (ins Int64Regs:$t, Int64Regs:$s)>; 3005 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3006 (ins Int64Regs:$t, i64imm:$s)>; 3007 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3008 (ins i64imm:$t, Int64Regs:$s)>; 3009 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3010 (ins i64imm:$t, i64imm:$s)>; 3011} 3012 3013defm TEX_CUBE_ARRAY_F32_F32 3014 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3015defm TEX_CUBE_ARRAY_S32_F32 3016 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3017defm TEX_CUBE_ARRAY_U32_F32 3018 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3019 3020class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3021 NVPTXRegClass intype, dag texsamp> 3022 : NVPTXInst<(outs outtype:$r, outtype:$g, 3023 outtype:$b, outtype:$a), 3024 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3025 intype:$z, intype:$lod)), 3026 inst # " \t\\{$r, $g, $b, $a\\}," 3027 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3028 []>; 3029 3030multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3031 NVPTXRegClass intype> { 3032 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3033 (ins Int64Regs:$t, Int64Regs:$s)>; 3034 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3035 (ins Int64Regs:$t, i64imm:$s)>; 3036 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3037 (ins i64imm:$t, Int64Regs:$s)>; 3038 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3039 (ins i64imm:$t, i64imm:$s)>; 3040} 3041 3042defm TEX_CUBE_ARRAY_F32_F32_LEVEL 3043 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3044 Float32Regs, Float32Regs>; 3045defm TEX_CUBE_ARRAY_S32_F32_LEVEL 3046 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3047 Int32Regs, Float32Regs>; 3048defm TEX_CUBE_ARRAY_U32_F32_LEVEL 3049 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3050 Int32Regs, Float32Regs>; 3051 3052class TLD4_2D_base<string inst, NVPTXRegClass outtype, 3053 NVPTXRegClass intype, dag texsamp> 3054 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3055 outtype:$v2, outtype:$v3), 3056 !con(texsamp, (ins intype:$x, intype:$y)), 3057 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", 3058 []>; 3059 3060multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3061 def _RR : TLD4_2D_base<inst, outtype, intype, 3062 (ins Int64Regs:$t, Int64Regs:$s)>; 3063 def _RI : TLD4_2D_base<inst, outtype, intype, 3064 (ins Int64Regs:$t, i64imm:$s)>; 3065 def _IR : TLD4_2D_base<inst, outtype, intype, 3066 (ins i64imm:$t, Int64Regs:$s)>; 3067 def _II : TLD4_2D_base<inst, outtype, intype, 3068 (ins i64imm:$t, i64imm:$s)>; 3069} 3070 3071defm TLD4_R_2D_F32_F32 3072 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3073defm TLD4_G_2D_F32_F32 3074 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3075defm TLD4_B_2D_F32_F32 3076 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3077defm TLD4_A_2D_F32_F32 3078 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3079 3080defm TLD4_R_2D_S32_F32 3081 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3082defm TLD4_G_2D_S32_F32 3083 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3084defm TLD4_B_2D_S32_F32 3085 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3086defm TLD4_A_2D_S32_F32 3087 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3088 3089defm TLD4_R_2D_U32_F32 3090 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3091defm TLD4_G_2D_U32_F32 3092 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3093defm TLD4_B_2D_U32_F32 3094 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3095defm TLD4_A_2D_U32_F32 3096 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3097 3098} 3099 3100 3101// texmode_unified 3102let IsTex = true, IsTexModeUnified = true in { 3103// Texture fetch instructions using handles 3104 3105class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, 3106 NVPTXRegClass intype, dag tex> 3107 : NVPTXInst<(outs outtype:$r, outtype:$g, 3108 outtype:$b, outtype:$a), 3109 !con(tex, (ins intype:$x)), 3110 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3111 []>; 3112 3113multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, 3114 NVPTXRegClass intype> { 3115 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3116 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; 3117} 3118 3119defm TEX_UNIFIED_1D_F32_S32 3120 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 3121defm TEX_UNIFIED_1D_F32_F32 3122 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3123defm TEX_UNIFIED_1D_S32_S32 3124 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 3125defm TEX_UNIFIED_1D_S32_F32 3126 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3127defm TEX_UNIFIED_1D_U32_S32 3128 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 3129defm TEX_UNIFIED_1D_U32_F32 3130 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3131 3132class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 3133 NVPTXRegClass intype, dag tex> 3134 : NVPTXInst<(outs outtype:$r, outtype:$g, 3135 outtype:$b, outtype:$a), 3136 !con(tex, (ins intype:$x, intype:$lod)), 3137 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", 3138 []>; 3139 3140multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, 3141 NVPTXRegClass intype> { 3142 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3143 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3144} 3145 3146defm TEX_UNIFIED_1D_F32_F32_LEVEL 3147 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3148defm TEX_UNIFIED_1D_S32_F32_LEVEL 3149 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3150defm TEX_UNIFIED_1D_U32_F32_LEVEL 3151 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3152 3153class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, 3154 NVPTXRegClass intype, dag tex> 3155 : NVPTXInst<(outs outtype:$r, outtype:$g, 3156 outtype:$b, outtype:$a), 3157 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), 3158 inst # " \t\\{$r, $g, $b, $a\\}," 3159 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3160 []>; 3161 3162multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, 3163 NVPTXRegClass intype> { 3164 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3165 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3166} 3167 3168defm TEX_UNIFIED_1D_F32_F32_GRAD 3169 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3170defm TEX_UNIFIED_1D_S32_F32_GRAD 3171 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3172defm TEX_UNIFIED_1D_U32_F32_GRAD 3173 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3174 3175class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 3176 NVPTXRegClass intype, dag tex> 3177 : NVPTXInst<(outs outtype:$r, outtype:$g, 3178 outtype:$b, outtype:$a), 3179 !con(tex, (ins Int32Regs:$l, intype:$x)), 3180 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", 3181 []>; 3182 3183multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, 3184 NVPTXRegClass intype> { 3185 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3186 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3187} 3188 3189defm TEX_UNIFIED_1D_ARRAY_F32_S32 3190 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 3191defm TEX_UNIFIED_1D_ARRAY_F32_F32 3192 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 3193defm TEX_UNIFIED_1D_ARRAY_S32_S32 3194 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 3195defm TEX_UNIFIED_1D_ARRAY_S32_F32 3196 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 3197defm TEX_UNIFIED_1D_ARRAY_U32_S32 3198 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 3199defm TEX_UNIFIED_1D_ARRAY_U32_F32 3200 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 3201 3202class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3203 NVPTXRegClass intype, dag tex> 3204 : NVPTXInst<(outs outtype:$r, outtype:$g, 3205 outtype:$b, outtype:$a), 3206 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), 3207 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", 3208 []>; 3209 3210multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3211 NVPTXRegClass intype> { 3212 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3213 (ins Int64Regs:$t)>; 3214 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3215 (ins i64imm:$t)>; 3216} 3217 3218defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3219 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", 3220 Float32Regs, Float32Regs>; 3221defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3222 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", 3223 Int32Regs, Float32Regs>; 3224defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3225 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", 3226 Int32Regs, Float32Regs>; 3227 3228class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3229 NVPTXRegClass intype, dag tex> 3230 : NVPTXInst<(outs outtype:$r, outtype:$g, 3231 outtype:$b, outtype:$a), 3232 !con(tex, (ins Int32Regs:$l, intype:$x, 3233 intype:$gradx, intype:$grady)), 3234 inst # " \t\\{$r, $g, $b, $a\\}," 3235 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3236 []>; 3237 3238multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3239 NVPTXRegClass intype> { 3240 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3241 (ins Int64Regs:$t)>; 3242 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3243 (ins i64imm:$t)>; 3244} 3245 3246defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3247 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", 3248 Float32Regs, Float32Regs>; 3249defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3250 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", 3251 Int32Regs, Float32Regs>; 3252defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3253 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", 3254 Int32Regs, Float32Regs>; 3255 3256class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3257 NVPTXRegClass intype, dag tex> 3258 : NVPTXInst<(outs outtype:$r, outtype:$g, 3259 outtype:$b, outtype:$a), 3260 !con(tex, (ins intype:$x, intype:$y)), 3261 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", 3262 []>; 3263 3264multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3265 NVPTXRegClass intype> { 3266 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3267 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3268} 3269 3270defm TEX_UNIFIED_2D_F32_S32 3271 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 3272defm TEX_UNIFIED_2D_F32_F32 3273 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3274defm TEX_UNIFIED_2D_S32_S32 3275 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 3276defm TEX_UNIFIED_2D_S32_F32 3277 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3278defm TEX_UNIFIED_2D_U32_S32 3279 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 3280defm TEX_UNIFIED_2D_U32_F32 3281 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3282 3283class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 3284 NVPTXRegClass intype, dag tex> 3285 : NVPTXInst<(outs outtype:$r, outtype:$g, 3286 outtype:$b, outtype:$a), 3287 !con(tex, (ins intype:$x, intype:$y, intype:$lod)), 3288 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", 3289 []>; 3290 3291multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, 3292 NVPTXRegClass intype> { 3293 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3294 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3295} 3296 3297defm TEX_UNIFIED_2D_F32_F32_LEVEL 3298 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3299defm TEX_UNIFIED_2D_S32_F32_LEVEL 3300 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3301defm TEX_UNIFIED_2D_U32_F32_LEVEL 3302 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3303 3304class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, 3305 NVPTXRegClass intype, dag tex> 3306 : NVPTXInst<(outs outtype:$r, outtype:$g, 3307 outtype:$b, outtype:$a), 3308 !con(tex, (ins intype:$x, intype:$y, 3309 intype:$gradx0, intype:$gradx1, 3310 intype:$grady0, intype:$grady1)), 3311 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," 3312 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3313 []>; 3314multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, 3315 NVPTXRegClass intype> { 3316 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3317 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3318} 3319 3320defm TEX_UNIFIED_2D_F32_F32_GRAD 3321 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3322defm TEX_UNIFIED_2D_S32_F32_GRAD 3323 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3324defm TEX_UNIFIED_2D_U32_F32_GRAD 3325 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3326 3327class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 3328 NVPTXRegClass intype, dag tex> 3329 : NVPTXInst<(outs outtype:$r, outtype:$g, 3330 outtype:$b, outtype:$a), 3331 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), 3332 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", 3333 []>; 3334multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, 3335 NVPTXRegClass intype> { 3336 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3337 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3338} 3339 3340defm TEX_UNIFIED_2D_ARRAY_F32_S32 3341 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 3342defm TEX_UNIFIED_2D_ARRAY_F32_F32 3343 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3344defm TEX_UNIFIED_2D_ARRAY_S32_S32 3345 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 3346defm TEX_UNIFIED_2D_ARRAY_S32_F32 3347 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3348defm TEX_UNIFIED_2D_ARRAY_U32_S32 3349 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 3350defm TEX_UNIFIED_2D_ARRAY_U32_F32 3351 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3352 3353class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3354 NVPTXRegClass intype, dag tex> 3355 : NVPTXInst<(outs outtype:$r, outtype:$g, 3356 outtype:$b, outtype:$a), 3357 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3358 intype:$lod)), 3359 inst # " \t\\{$r, $g, $b, $a\\}," 3360 " [$t, \\{$l, $x, $y, $y\\}], $lod;", 3361 []>; 3362multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3363 NVPTXRegClass intype> { 3364 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3365 (ins Int64Regs:$t)>; 3366 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3367 (ins i64imm:$t)>; 3368} 3369 3370defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3371 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", 3372 Float32Regs, Float32Regs>; 3373defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3374 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", 3375 Int32Regs, Float32Regs>; 3376defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3377 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", 3378 Int32Regs, Float32Regs>; 3379 3380class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3381 NVPTXRegClass intype, dag tex> 3382 : NVPTXInst<(outs outtype:$r, outtype:$g, 3383 outtype:$b, outtype:$a), 3384 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3385 intype:$gradx0, intype:$gradx1, 3386 intype:$grady0, intype:$grady1)), 3387 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," 3388 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3389 []>; 3390multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3391 NVPTXRegClass intype> { 3392 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3393 (ins Int64Regs:$t)>; 3394 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3395 (ins i64imm:$t)>; 3396} 3397 3398defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3399 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", 3400 Float32Regs, Float32Regs>; 3401defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3402 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", 3403 Int32Regs, Float32Regs>; 3404defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3405 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", 3406 Int32Regs, Float32Regs>; 3407 3408class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, 3409 NVPTXRegClass intype, dag tex> 3410 : NVPTXInst<(outs outtype:$r, outtype:$g, 3411 outtype:$b, outtype:$a), 3412 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3413 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3414 []>; 3415multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, 3416 NVPTXRegClass intype> { 3417 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3418 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; 3419} 3420 3421defm TEX_UNIFIED_3D_F32_S32 3422 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3423defm TEX_UNIFIED_3D_F32_F32 3424 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3425defm TEX_UNIFIED_3D_S32_S32 3426 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3427defm TEX_UNIFIED_3D_S32_F32 3428 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3429defm TEX_UNIFIED_3D_U32_S32 3430 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3431defm TEX_UNIFIED_3D_U32_F32 3432 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3433 3434class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3435 NVPTXRegClass intype, dag tex> 3436 : NVPTXInst<(outs outtype:$r, outtype:$g, 3437 outtype:$b, outtype:$a), 3438 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3439 inst # " \t\\{$r, $g, $b, $a\\}," 3440 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3441 []>; 3442multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, 3443 NVPTXRegClass intype> { 3444 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3445 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3446} 3447 3448defm TEX_UNIFIED_3D_F32_F32_LEVEL 3449 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3450defm TEX_UNIFIED_3D_S32_F32_LEVEL 3451 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3452defm TEX_UNIFIED_3D_U32_F32_LEVEL 3453 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3454 3455class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3456 NVPTXRegClass intype, dag tex> 3457 : NVPTXInst<(outs outtype:$r, outtype:$g, 3458 outtype:$b, outtype:$a), 3459 !con(tex, (ins intype:$x, intype:$y, intype:$z, 3460 intype:$gradx0, intype:$gradx1, 3461 intype:$gradx2, intype:$grady0, 3462 intype:$grady1, intype:$grady2)), 3463 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 3464 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3465 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3466 []>; 3467multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, 3468 NVPTXRegClass intype> { 3469 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3470 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3471} 3472 3473defm TEX_UNIFIED_3D_F32_F32_GRAD 3474 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3475defm TEX_UNIFIED_3D_S32_F32_GRAD 3476 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3477defm TEX_UNIFIED_3D_U32_F32_GRAD 3478 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3479 3480class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, 3481 NVPTXRegClass intype, dag tex> 3482 : NVPTXInst<(outs outtype:$r, outtype:$g, 3483 outtype:$b, outtype:$a), 3484 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3485 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3486 []>; 3487multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, 3488 NVPTXRegClass intype> { 3489 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3490 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; 3491} 3492 3493defm TEX_UNIFIED_CUBE_F32_F32 3494 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3495defm TEX_UNIFIED_CUBE_S32_F32 3496 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3497defm TEX_UNIFIED_CUBE_U32_F32 3498 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3499 3500class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3501 NVPTXRegClass intype, dag tex> 3502 : NVPTXInst<(outs outtype:$r, outtype:$g, 3503 outtype:$b, outtype:$a), 3504 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3505 inst # " \t\\{$r, $g, $b, $a\\}," 3506 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3507 []>; 3508multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3509 NVPTXRegClass intype> { 3510 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3511 (ins Int64Regs:$t)>; 3512 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3513 (ins i64imm:$t)>; 3514} 3515 3516defm TEX_UNIFIED_CUBE_F32_F32_LEVEL 3517 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", 3518 Float32Regs, Float32Regs>; 3519defm TEX_UNIFIED_CUBE_S32_F32_LEVEL 3520 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", 3521 Int32Regs, Float32Regs>; 3522defm TEX_UNIFIED_CUBE_U32_F32_LEVEL 3523 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", 3524 Int32Regs, Float32Regs>; 3525 3526class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3527 NVPTXRegClass intype, dag tex> 3528 : NVPTXInst<(outs outtype:$r, outtype:$g, 3529 outtype:$b, outtype:$a), 3530 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), 3531 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", 3532 []>; 3533multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3534 NVPTXRegClass intype> { 3535 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3536 (ins Int64Regs:$t)>; 3537 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3538 (ins i64imm:$t)>; 3539} 3540 3541defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 3542 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3543defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 3544 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3545defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 3546 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3547 3548class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3549 NVPTXRegClass intype, dag tex> 3550 : NVPTXInst<(outs outtype:$r, outtype:$g, 3551 outtype:$b, outtype:$a), 3552 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 3553 intype:$lod)), 3554 inst # " \t\\{$r, $g, $b, $a\\}," 3555 " [$t, \\{$l, $x, $y, $z\\}], $lod;", 3556 []>; 3557multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3558 NVPTXRegClass intype> { 3559 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3560 (ins Int64Regs:$t)>; 3561 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3562 (ins i64imm:$t)>; 3563} 3564 3565defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3566 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3567 Float32Regs, Float32Regs>; 3568defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3569 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3570 Int32Regs, Float32Regs>; 3571defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3572 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3573 Int32Regs, Float32Regs>; 3574 3575class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3576 NVPTXRegClass intype, dag tex> 3577 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3578 outtype:$v2, outtype:$v3), 3579 !con(tex, (ins intype:$x, intype:$y)), 3580 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", 3581 []>; 3582multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3583 NVPTXRegClass intype> { 3584 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3585 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3586} 3587 3588defm TLD4_UNIFIED_R_2D_F32_F32 3589 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3590defm TLD4_UNIFIED_G_2D_F32_F32 3591 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3592defm TLD4_UNIFIED_B_2D_F32_F32 3593 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3594defm TLD4_UNIFIED_A_2D_F32_F32 3595 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3596 3597defm TLD4_UNIFIED_R_2D_S32_F32 3598 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3599defm TLD4_UNIFIED_G_2D_S32_F32 3600 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3601defm TLD4_UNIFIED_B_2D_S32_F32 3602 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3603defm TLD4_UNIFIED_A_2D_S32_F32 3604 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3605 3606defm TLD4_UNIFIED_R_2D_U32_F32 3607 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3608defm TLD4_UNIFIED_G_2D_U32_F32 3609 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3610defm TLD4_UNIFIED_B_2D_U32_F32 3611 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3612defm TLD4_UNIFIED_A_2D_U32_F32 3613 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3614 3615} 3616 3617 3618 3619//=== Surface load instructions 3620 3621let IsSuld = true in { 3622 3623class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf> 3624 : NVPTXInst<(outs outtype:$r), 3625 !con(surf, (ins Int32Regs:$x)), 3626 inst # " \\{$r\\}, [$s, \\{$x\\}];", 3627 []>; 3628multiclass SULD_1D<string inst, NVPTXRegClass outtype> { 3629 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>; 3630 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; 3631} 3632 3633defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; 3634defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; 3635defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; 3636defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; 3637 3638defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; 3639defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; 3640defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; 3641defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; 3642 3643defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; 3644defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; 3645defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; 3646defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; 3647 3648class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3649 : NVPTXInst<(outs outtype:$r), 3650 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3651 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", 3652 []>; 3653multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { 3654 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3655 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3656} 3657 3658defm SULD_1D_ARRAY_I8_CLAMP 3659 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; 3660defm SULD_1D_ARRAY_I16_CLAMP 3661 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; 3662defm SULD_1D_ARRAY_I32_CLAMP 3663 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; 3664defm SULD_1D_ARRAY_I64_CLAMP 3665 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; 3666 3667defm SULD_1D_ARRAY_I8_TRAP 3668 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; 3669defm SULD_1D_ARRAY_I16_TRAP 3670 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; 3671defm SULD_1D_ARRAY_I32_TRAP 3672 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; 3673defm SULD_1D_ARRAY_I64_TRAP 3674 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; 3675 3676defm SULD_1D_ARRAY_I8_ZERO 3677 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; 3678defm SULD_1D_ARRAY_I16_ZERO 3679 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; 3680defm SULD_1D_ARRAY_I32_ZERO 3681 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; 3682defm SULD_1D_ARRAY_I64_ZERO 3683 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; 3684 3685class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf> 3686 : NVPTXInst<(outs outtype:$r), 3687 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3688 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", 3689 []>; 3690multiclass SULD_2D<string inst, NVPTXRegClass outtype> { 3691 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>; 3692 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; 3693} 3694 3695defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; 3696defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; 3697defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; 3698defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; 3699 3700defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; 3701defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; 3702defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; 3703defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; 3704 3705defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; 3706defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; 3707defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; 3708defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; 3709 3710class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3711 : NVPTXInst<(outs outtype:$r), 3712 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3713 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3714 []>; 3715multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { 3716 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3717 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3718} 3719 3720defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; 3721defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; 3722defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; 3723defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; 3724 3725defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; 3726defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; 3727defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; 3728defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; 3729 3730defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; 3731defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; 3732defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; 3733defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; 3734 3735class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf> 3736 : NVPTXInst<(outs outtype:$r), 3737 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 3738 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3739 []>; 3740multiclass SULD_3D<string inst, NVPTXRegClass outtype> { 3741 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>; 3742 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; 3743} 3744 3745defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; 3746defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; 3747defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; 3748defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; 3749 3750defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; 3751defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; 3752defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; 3753defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; 3754 3755defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; 3756defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; 3757defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; 3758defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; 3759} 3760 3761let IsSuld = 2 in { 3762 3763class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3764 : NVPTXInst<(outs outtype:$r, outtype:$g), 3765 !con(surf, (ins Int32Regs:$x)), 3766 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", 3767 []>; 3768multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { 3769 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3770 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; 3771} 3772 3773defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; 3774defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; 3775defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; 3776defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; 3777 3778defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; 3779defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; 3780defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; 3781defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; 3782 3783defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; 3784defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; 3785defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; 3786defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; 3787 3788class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3789 : NVPTXInst<(outs outtype:$r, outtype:$g), 3790 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3791 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3792 []>; 3793multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 3794 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3795 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 3796} 3797 3798defm SULD_1D_ARRAY_V2I8_CLAMP 3799 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; 3800defm SULD_1D_ARRAY_V2I16_CLAMP 3801 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; 3802defm SULD_1D_ARRAY_V2I32_CLAMP 3803 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; 3804defm SULD_1D_ARRAY_V2I64_CLAMP 3805 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; 3806 3807defm SULD_1D_ARRAY_V2I8_TRAP 3808 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; 3809defm SULD_1D_ARRAY_V2I16_TRAP 3810 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; 3811defm SULD_1D_ARRAY_V2I32_TRAP 3812 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; 3813defm SULD_1D_ARRAY_V2I64_TRAP 3814 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; 3815 3816defm SULD_1D_ARRAY_V2I8_ZERO 3817 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; 3818defm SULD_1D_ARRAY_V2I16_ZERO 3819 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; 3820defm SULD_1D_ARRAY_V2I32_ZERO 3821 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; 3822defm SULD_1D_ARRAY_V2I64_ZERO 3823 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; 3824 3825class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3826 : NVPTXInst<(outs outtype:$r, outtype:$g), 3827 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3828 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3829 []>; 3830multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { 3831 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3832 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; 3833} 3834 3835defm SULD_2D_V2I8_CLAMP 3836 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; 3837defm SULD_2D_V2I16_CLAMP 3838 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; 3839defm SULD_2D_V2I32_CLAMP 3840 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; 3841defm SULD_2D_V2I64_CLAMP 3842 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; 3843 3844defm SULD_2D_V2I8_TRAP 3845 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; 3846defm SULD_2D_V2I16_TRAP 3847 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; 3848defm SULD_2D_V2I32_TRAP 3849 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; 3850defm SULD_2D_V2I64_TRAP 3851 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; 3852 3853defm SULD_2D_V2I8_ZERO 3854 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; 3855defm SULD_2D_V2I16_ZERO 3856 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; 3857defm SULD_2D_V2I32_ZERO 3858 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; 3859defm SULD_2D_V2I64_ZERO 3860 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; 3861 3862class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3863 : NVPTXInst<(outs outtype:$r, outtype:$g), 3864 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3865 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", 3866 []>; 3867multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 3868 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3869 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 3870} 3871 3872defm SULD_2D_ARRAY_V2I8_CLAMP 3873 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; 3874defm SULD_2D_ARRAY_V2I16_CLAMP 3875 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; 3876defm SULD_2D_ARRAY_V2I32_CLAMP 3877 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; 3878defm SULD_2D_ARRAY_V2I64_CLAMP 3879 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; 3880 3881defm SULD_2D_ARRAY_V2I8_TRAP 3882 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; 3883defm SULD_2D_ARRAY_V2I16_TRAP 3884 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; 3885defm SULD_2D_ARRAY_V2I32_TRAP 3886 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; 3887defm SULD_2D_ARRAY_V2I64_TRAP 3888 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; 3889 3890defm SULD_2D_ARRAY_V2I8_ZERO 3891 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; 3892defm SULD_2D_ARRAY_V2I16_ZERO 3893 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; 3894defm SULD_2D_ARRAY_V2I32_ZERO 3895 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; 3896defm SULD_2D_ARRAY_V2I64_ZERO 3897 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; 3898 3899class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3900 : NVPTXInst<(outs outtype:$r, outtype:$g), 3901 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 3902 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3903 []>; 3904multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { 3905 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3906 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; 3907} 3908 3909defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; 3910defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; 3911defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; 3912defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; 3913 3914defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; 3915defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; 3916defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; 3917defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; 3918 3919defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; 3920defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; 3921defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; 3922defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; 3923 3924} 3925 3926let IsSuld = 3 in { 3927 3928class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 3929 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 3930 !con(surf, (ins Int32Regs:$x)), 3931 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3932 []>; 3933multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { 3934 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 3935 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; 3936} 3937 3938defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; 3939defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; 3940defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; 3941 3942defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; 3943defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; 3944defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; 3945 3946defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; 3947defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; 3948defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; 3949 3950class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 3951 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 3952 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3953 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", 3954 []>; 3955multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 3956 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 3957 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 3958} 3959 3960defm SULD_1D_ARRAY_V4I8_CLAMP 3961 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; 3962defm SULD_1D_ARRAY_V4I16_CLAMP 3963 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; 3964defm SULD_1D_ARRAY_V4I32_CLAMP 3965 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; 3966 3967defm SULD_1D_ARRAY_V4I8_TRAP 3968 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; 3969defm SULD_1D_ARRAY_V4I16_TRAP 3970 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; 3971defm SULD_1D_ARRAY_V4I32_TRAP 3972 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; 3973 3974defm SULD_1D_ARRAY_V4I8_ZERO 3975 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; 3976defm SULD_1D_ARRAY_V4I16_ZERO 3977 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; 3978defm SULD_1D_ARRAY_V4I32_ZERO 3979 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; 3980 3981class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 3982 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 3983 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3984 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3985 []>; 3986multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { 3987 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 3988 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; 3989} 3990 3991defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; 3992defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; 3993defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; 3994 3995defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; 3996defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; 3997defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; 3998 3999defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; 4000defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; 4001defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; 4002 4003class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4004 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4005 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4006 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", 4007 []>; 4008multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4009 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4010 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4011} 4012 4013defm SULD_2D_ARRAY_V4I8_CLAMP 4014 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; 4015defm SULD_2D_ARRAY_V4I16_CLAMP 4016 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; 4017defm SULD_2D_ARRAY_V4I32_CLAMP 4018 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; 4019 4020defm SULD_2D_ARRAY_V4I8_TRAP 4021 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; 4022defm SULD_2D_ARRAY_V4I16_TRAP 4023 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; 4024defm SULD_2D_ARRAY_V4I32_TRAP 4025 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; 4026 4027defm SULD_2D_ARRAY_V4I8_ZERO 4028 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; 4029defm SULD_2D_ARRAY_V4I16_ZERO 4030 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; 4031defm SULD_2D_ARRAY_V4I32_ZERO 4032 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; 4033 4034class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4035 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4036 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4037 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", 4038 []>; 4039multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { 4040 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4041 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; 4042} 4043 4044defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; 4045defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; 4046defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; 4047 4048defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; 4049defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; 4050defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; 4051 4052defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; 4053defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; 4054defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; 4055 4056} 4057 4058//----------------------------------- 4059// Texture Query Intrinsics 4060//----------------------------------- 4061 4062let IsSurfTexQuery = true in { 4063def TXQ_CHANNEL_ORDER_R 4064 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4065 "txq.channel_order.b32 \t$d, [$a];", 4066 []>; 4067def TXQ_CHANNEL_ORDER_I 4068 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4069 "txq.channel_order.b32 \t$d, [$a];", 4070 []>; 4071def TXQ_CHANNEL_DATA_TYPE_R 4072 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4073 "txq.channel_data_type.b32 \t$d, [$a];", 4074 []>; 4075def TXQ_CHANNEL_DATA_TYPE_I 4076 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4077 "txq.channel_data_type.b32 \t$d, [$a];", 4078 []>; 4079def TXQ_WIDTH_R 4080 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4081 "txq.width.b32 \t$d, [$a];", 4082 []>; 4083def TXQ_WIDTH_I 4084 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4085 "txq.width.b32 \t$d, [$a];", 4086 []>; 4087def TXQ_HEIGHT_R 4088 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4089 "txq.height.b32 \t$d, [$a];", 4090 []>; 4091def TXQ_HEIGHT_I 4092 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4093 "txq.height.b32 \t$d, [$a];", 4094 []>; 4095def TXQ_DEPTH_R 4096 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4097 "txq.depth.b32 \t$d, [$a];", 4098 []>; 4099def TXQ_DEPTH_I 4100 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4101 "txq.depth.b32 \t$d, [$a];", 4102 []>; 4103def TXQ_ARRAY_SIZE_R 4104 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4105 "txq.array_size.b32 \t$d, [$a];", 4106 []>; 4107def TXQ_ARRAY_SIZE_I 4108 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4109 "txq.array_size.b32 \t$d, [$a];", 4110 []>; 4111def TXQ_NUM_SAMPLES_R 4112 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4113 "txq.num_samples.b32 \t$d, [$a];", 4114 []>; 4115def TXQ_NUM_SAMPLES_I 4116 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4117 "txq.num_samples.b32 \t$d, [$a];", 4118 []>; 4119def TXQ_NUM_MIPMAP_LEVELS_R 4120 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4121 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4122 []>; 4123def TXQ_NUM_MIPMAP_LEVELS_I 4124 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4125 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4126 []>; 4127} 4128 4129def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4130 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4131def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4132 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4133def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4134 (TXQ_WIDTH_R Int64Regs:$a)>; 4135def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4136 (TXQ_HEIGHT_R Int64Regs:$a)>; 4137def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4138 (TXQ_DEPTH_R Int64Regs:$a)>; 4139def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4140 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; 4141def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4142 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; 4143def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4144 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; 4145 4146 4147//----------------------------------- 4148// Surface Query Intrinsics 4149//----------------------------------- 4150 4151let IsSurfTexQuery = true in { 4152def SUQ_CHANNEL_ORDER_R 4153 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4154 "suq.channel_order.b32 \t$d, [$a];", 4155 []>; 4156def SUQ_CHANNEL_ORDER_I 4157 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4158 "suq.channel_order.b32 \t$d, [$a];", 4159 []>; 4160def SUQ_CHANNEL_DATA_TYPE_R 4161 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4162 "suq.channel_data_type.b32 \t$d, [$a];", 4163 []>; 4164def SUQ_CHANNEL_DATA_TYPE_I 4165 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4166 "suq.channel_data_type.b32 \t$d, [$a];", 4167 []>; 4168def SUQ_WIDTH_R 4169 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4170 "suq.width.b32 \t$d, [$a];", 4171 []>; 4172def SUQ_WIDTH_I 4173 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4174 "suq.width.b32 \t$d, [$a];", 4175 []>; 4176def SUQ_HEIGHT_R 4177 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4178 "suq.height.b32 \t$d, [$a];", 4179 []>; 4180def SUQ_HEIGHT_I 4181 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4182 "suq.height.b32 \t$d, [$a];", 4183 []>; 4184def SUQ_DEPTH_R 4185 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4186 "suq.depth.b32 \t$d, [$a];", 4187 []>; 4188def SUQ_DEPTH_I 4189 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4190 "suq.depth.b32 \t$d, [$a];", 4191 []>; 4192def SUQ_ARRAY_SIZE_R 4193 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4194 "suq.array_size.b32 \t$d, [$a];", 4195 []>; 4196def SUQ_ARRAY_SIZE_I 4197 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4198 "suq.array_size.b32 \t$d, [$a];", 4199 []>; 4200} 4201 4202def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4203 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4204def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4205 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4206def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4207 (SUQ_WIDTH_R Int64Regs:$a)>; 4208def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4209 (SUQ_HEIGHT_R Int64Regs:$a)>; 4210def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4211 (SUQ_DEPTH_R Int64Regs:$a)>; 4212def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4213 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; 4214 4215 4216//===- Handle Query -------------------------------------------------------===// 4217 4218// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4219def ISTYPEP_SAMPLER 4220 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4221 "istypep.samplerref \t$d, $a;", 4222 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4223def ISTYPEP_SURFACE 4224 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4225 "istypep.surfref \t$d, $a;", 4226 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4227def ISTYPEP_TEXTURE 4228 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4229 "istypep.texref \t$d, $a;", 4230 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4231 4232//===- Surface Stores -----------------------------------------------------===// 4233 4234let IsSust = true in { 4235 4236class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf> 4237 : NVPTXInst<(outs), 4238 !con(surf, (ins Int32Regs:$x, intype:$r)), 4239 inst # " \t[$s, \\{$x\\}], \\{$r\\};", 4240 []>; 4241multiclass SUST_1D<string inst, NVPTXRegClass intype> { 4242 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>; 4243 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>; 4244} 4245 4246defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; 4247defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; 4248defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; 4249defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; 4250 4251defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; 4252defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; 4253defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; 4254defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; 4255 4256defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; 4257defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; 4258defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; 4259defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; 4260 4261defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; 4262defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; 4263defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; 4264 4265class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4266 : NVPTXInst<(outs), 4267 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), 4268 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", 4269 []>; 4270multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> { 4271 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4272 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>; 4273} 4274 4275defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; 4276defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; 4277defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; 4278defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; 4279 4280defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; 4281defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; 4282defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; 4283defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; 4284 4285defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; 4286defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; 4287defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; 4288defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; 4289 4290defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; 4291defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; 4292defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; 4293 4294class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4295 : NVPTXInst<(outs), 4296 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, 4297 intype:$b, intype:$a)), 4298 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4299 []>; 4300multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> { 4301 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4302 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>; 4303} 4304 4305defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; 4306defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; 4307defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; 4308 4309defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; 4310defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; 4311defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; 4312 4313defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; 4314defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; 4315defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; 4316 4317defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; 4318defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; 4319defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; 4320 4321class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4322 : NVPTXInst<(outs), 4323 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), 4324 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4325 []>; 4326multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> { 4327 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4328 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4329} 4330 4331defm SUST_B_1D_ARRAY_B8_CLAMP 4332 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; 4333defm SUST_B_1D_ARRAY_B16_CLAMP 4334 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; 4335defm SUST_B_1D_ARRAY_B32_CLAMP 4336 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; 4337defm SUST_B_1D_ARRAY_B64_CLAMP 4338 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; 4339 4340defm SUST_B_1D_ARRAY_B8_TRAP 4341 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; 4342defm SUST_B_1D_ARRAY_B16_TRAP 4343 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; 4344defm SUST_B_1D_ARRAY_B32_TRAP 4345 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; 4346defm SUST_B_1D_ARRAY_B64_TRAP 4347 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; 4348 4349defm SUST_B_1D_ARRAY_B8_ZERO 4350 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; 4351defm SUST_B_1D_ARRAY_B16_ZERO 4352 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; 4353defm SUST_B_1D_ARRAY_B32_ZERO 4354 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; 4355defm SUST_B_1D_ARRAY_B64_ZERO 4356 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; 4357 4358defm SUST_P_1D_ARRAY_B8_TRAP 4359 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; 4360defm SUST_P_1D_ARRAY_B16_TRAP 4361 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; 4362defm SUST_P_1D_ARRAY_B32_TRAP 4363 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; 4364 4365class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4366 : NVPTXInst<(outs), 4367 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4368 intype:$r, intype:$g)), 4369 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4370 []>; 4371multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4372 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4373 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4374} 4375 4376defm SUST_B_1D_ARRAY_V2B8_CLAMP 4377 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; 4378defm SUST_B_1D_ARRAY_V2B16_CLAMP 4379 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; 4380defm SUST_B_1D_ARRAY_V2B32_CLAMP 4381 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; 4382defm SUST_B_1D_ARRAY_V2B64_CLAMP 4383 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; 4384 4385defm SUST_B_1D_ARRAY_V2B8_TRAP 4386 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; 4387defm SUST_B_1D_ARRAY_V2B16_TRAP 4388 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; 4389defm SUST_B_1D_ARRAY_V2B32_TRAP 4390 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; 4391defm SUST_B_1D_ARRAY_V2B64_TRAP 4392 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; 4393 4394defm SUST_B_1D_ARRAY_V2B8_ZERO 4395 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; 4396defm SUST_B_1D_ARRAY_V2B16_ZERO 4397 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; 4398defm SUST_B_1D_ARRAY_V2B32_ZERO 4399 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; 4400defm SUST_B_1D_ARRAY_V2B64_ZERO 4401 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; 4402 4403defm SUST_P_1D_ARRAY_V2B8_TRAP 4404 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; 4405defm SUST_P_1D_ARRAY_V2B16_TRAP 4406 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; 4407defm SUST_P_1D_ARRAY_V2B32_TRAP 4408 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; 4409 4410class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4411 : NVPTXInst<(outs), 4412 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4413 intype:$r, intype:$g, intype:$b, intype:$a)), 4414 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", 4415 []>; 4416multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4417 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4418 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4419} 4420 4421defm SUST_B_1D_ARRAY_V4B8_CLAMP 4422 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; 4423defm SUST_B_1D_ARRAY_V4B16_CLAMP 4424 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; 4425defm SUST_B_1D_ARRAY_V4B32_CLAMP 4426 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; 4427 4428defm SUST_B_1D_ARRAY_V4B8_TRAP 4429 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; 4430defm SUST_B_1D_ARRAY_V4B16_TRAP 4431 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; 4432defm SUST_B_1D_ARRAY_V4B32_TRAP 4433 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; 4434 4435defm SUST_B_1D_ARRAY_V4B8_ZERO 4436 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; 4437defm SUST_B_1D_ARRAY_V4B16_ZERO 4438 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; 4439defm SUST_B_1D_ARRAY_V4B32_ZERO 4440 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; 4441 4442defm SUST_P_1D_ARRAY_V4B8_TRAP 4443 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; 4444defm SUST_P_1D_ARRAY_V4B16_TRAP 4445 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; 4446defm SUST_P_1D_ARRAY_V4B32_TRAP 4447 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; 4448 4449class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf> 4450 : NVPTXInst<(outs), 4451 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), 4452 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", 4453 []>; 4454multiclass SUST_2D<string inst, NVPTXRegClass intype> { 4455 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>; 4456 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>; 4457} 4458 4459defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; 4460defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; 4461defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; 4462defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; 4463 4464defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; 4465defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; 4466defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; 4467defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; 4468 4469defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; 4470defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; 4471defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; 4472defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; 4473 4474defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; 4475defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; 4476defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; 4477 4478class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4479 : NVPTXInst<(outs), 4480 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4481 intype:$r, intype:$g)), 4482 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4483 []>; 4484multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> { 4485 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4486 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>; 4487} 4488 4489defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; 4490defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; 4491defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; 4492defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; 4493 4494defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; 4495defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; 4496defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; 4497defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; 4498 4499defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; 4500defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; 4501defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; 4502defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; 4503 4504defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; 4505defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; 4506defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; 4507 4508class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4509 : NVPTXInst<(outs), 4510 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4511 intype:$r, intype:$g, intype:$b, intype:$a)), 4512 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", 4513 []>; 4514multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> { 4515 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4516 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>; 4517} 4518 4519defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; 4520defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; 4521defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; 4522 4523defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; 4524defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; 4525defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; 4526 4527defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; 4528defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; 4529defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; 4530 4531defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; 4532defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; 4533defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; 4534 4535class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4536 : NVPTXInst<(outs), 4537 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4538 intype:$r)), 4539 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4540 []>; 4541multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> { 4542 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4543 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4544} 4545 4546defm SUST_B_2D_ARRAY_B8_CLAMP 4547 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; 4548defm SUST_B_2D_ARRAY_B16_CLAMP 4549 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; 4550defm SUST_B_2D_ARRAY_B32_CLAMP 4551 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; 4552defm SUST_B_2D_ARRAY_B64_CLAMP 4553 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; 4554 4555defm SUST_B_2D_ARRAY_B8_TRAP 4556 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; 4557defm SUST_B_2D_ARRAY_B16_TRAP 4558 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; 4559defm SUST_B_2D_ARRAY_B32_TRAP 4560 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; 4561defm SUST_B_2D_ARRAY_B64_TRAP 4562 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; 4563 4564defm SUST_B_2D_ARRAY_B8_ZERO 4565 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; 4566defm SUST_B_2D_ARRAY_B16_ZERO 4567 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; 4568defm SUST_B_2D_ARRAY_B32_ZERO 4569 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; 4570defm SUST_B_2D_ARRAY_B64_ZERO 4571 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; 4572 4573defm SUST_P_2D_ARRAY_B8_TRAP 4574 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; 4575defm SUST_P_2D_ARRAY_B16_TRAP 4576 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; 4577defm SUST_P_2D_ARRAY_B32_TRAP 4578 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; 4579 4580class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4581 : NVPTXInst<(outs), 4582 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4583 intype:$r, intype:$g)), 4584 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", 4585 []>; 4586multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4587 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4588 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4589} 4590 4591defm SUST_B_2D_ARRAY_V2B8_CLAMP 4592 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; 4593defm SUST_B_2D_ARRAY_V2B16_CLAMP 4594 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; 4595defm SUST_B_2D_ARRAY_V2B32_CLAMP 4596 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; 4597defm SUST_B_2D_ARRAY_V2B64_CLAMP 4598 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; 4599 4600defm SUST_B_2D_ARRAY_V2B8_TRAP 4601 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; 4602defm SUST_B_2D_ARRAY_V2B16_TRAP 4603 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; 4604defm SUST_B_2D_ARRAY_V2B32_TRAP 4605 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; 4606defm SUST_B_2D_ARRAY_V2B64_TRAP 4607 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; 4608 4609defm SUST_B_2D_ARRAY_V2B8_ZERO 4610 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; 4611defm SUST_B_2D_ARRAY_V2B16_ZERO 4612 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; 4613defm SUST_B_2D_ARRAY_V2B32_ZERO 4614 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; 4615defm SUST_B_2D_ARRAY_V2B64_ZERO 4616 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; 4617 4618defm SUST_P_2D_ARRAY_V2B8_TRAP 4619 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; 4620defm SUST_P_2D_ARRAY_V2B16_TRAP 4621 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; 4622defm SUST_P_2D_ARRAY_V2B32_TRAP 4623 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; 4624 4625class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4626 : NVPTXInst<(outs), 4627 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4628 intype:$r, intype:$g, intype:$b, intype:$a)), 4629 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", 4630 []>; 4631multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4632 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4633 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4634} 4635 4636defm SUST_B_2D_ARRAY_V4B8_CLAMP 4637 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; 4638defm SUST_B_2D_ARRAY_V4B16_CLAMP 4639 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; 4640defm SUST_B_2D_ARRAY_V4B32_CLAMP 4641 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; 4642 4643defm SUST_B_2D_ARRAY_V4B8_TRAP 4644 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; 4645defm SUST_B_2D_ARRAY_V4B16_TRAP 4646 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; 4647defm SUST_B_2D_ARRAY_V4B32_TRAP 4648 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; 4649 4650defm SUST_B_2D_ARRAY_V4B8_ZERO 4651 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; 4652defm SUST_B_2D_ARRAY_V4B16_ZERO 4653 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; 4654defm SUST_B_2D_ARRAY_V4B32_ZERO 4655 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; 4656 4657defm SUST_P_2D_ARRAY_V4B8_TRAP 4658 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; 4659defm SUST_P_2D_ARRAY_V4B16_TRAP 4660 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; 4661defm SUST_P_2D_ARRAY_V4B32_TRAP 4662 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; 4663 4664class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf> 4665 : NVPTXInst<(outs), 4666 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4667 intype:$r)), 4668 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4669 []>; 4670multiclass SUST_3D<string inst, NVPTXRegClass intype> { 4671 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>; 4672 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>; 4673} 4674 4675defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; 4676defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; 4677defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; 4678defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; 4679 4680defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; 4681defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; 4682defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; 4683defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; 4684 4685defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; 4686defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; 4687defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; 4688defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; 4689 4690defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; 4691defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; 4692defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; 4693 4694class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4695 : NVPTXInst<(outs), 4696 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4697 intype:$r, intype:$g)), 4698 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", 4699 []>; 4700multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> { 4701 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4702 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>; 4703} 4704 4705defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; 4706defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; 4707defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; 4708defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; 4709 4710defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; 4711defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; 4712defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; 4713defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; 4714 4715defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; 4716defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; 4717defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; 4718defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; 4719 4720defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; 4721defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; 4722defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; 4723 4724class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4725 : NVPTXInst<(outs), 4726 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4727 intype:$r, intype:$g, intype:$b, intype:$a)), 4728 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", 4729 []>; 4730multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> { 4731 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4732 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>; 4733} 4734 4735defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; 4736defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; 4737defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; 4738 4739defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; 4740defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; 4741defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; 4742 4743defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; 4744defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; 4745defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; 4746 4747defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; 4748defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; 4749defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; 4750 4751} 4752 4753// Surface store instruction patterns 4754// I'm not sure why we can't just include these in the instruction definitions, 4755// but TableGen complains of type errors :( 4756 4757// .clamp variant 4758def : Pat<(int_nvvm_sust_b_1d_i8_clamp 4759 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4760 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4761 4762def : Pat<(int_nvvm_sust_b_1d_i16_clamp 4763 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4764 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4765 4766def : Pat<(int_nvvm_sust_b_1d_i32_clamp 4767 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4768 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 4769 4770def : Pat<(int_nvvm_sust_b_1d_i64_clamp 4771 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4772 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 4773 4774def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 4775 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4776 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4777 Int16Regs:$r, Int16Regs:$g)>; 4778 4779def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 4780 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4781 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4782 Int16Regs:$r, Int16Regs:$g)>; 4783 4784def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 4785 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4786 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4787 Int32Regs:$r, Int32Regs:$g)>; 4788 4789def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 4790 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4791 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4792 Int64Regs:$r, Int64Regs:$g)>; 4793 4794def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 4795 Int64Regs:$s, Int32Regs:$x, 4796 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4797 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4798 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4799 4800def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 4801 Int64Regs:$s, Int32Regs:$x, 4802 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4803 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4804 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4805 4806def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 4807 Int64Regs:$s, Int32Regs:$x, 4808 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4809 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4810 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4811 4812 4813 4814def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 4815 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 4816 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4817 Int16Regs:$r)>; 4818 4819def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 4820 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 4821 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4822 Int16Regs:$r)>; 4823 4824def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 4825 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 4826 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4827 Int32Regs:$r)>; 4828 4829def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 4830 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 4831 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4832 Int64Regs:$r)>; 4833 4834def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 4835 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4836 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4837 Int16Regs:$r, Int16Regs:$g)>; 4838 4839def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 4840 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4841 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4842 Int16Regs:$r, Int16Regs:$g)>; 4843 4844def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 4845 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4846 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4847 Int32Regs:$r, Int32Regs:$g)>; 4848 4849def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 4850 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4851 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4852 Int64Regs:$r, Int64Regs:$g)>; 4853 4854def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 4855 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4856 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4857 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4858 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4859 4860def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 4861 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4862 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4863 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4864 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4865 4866def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 4867 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4868 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4869 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4870 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4871 4872 4873 4874def : Pat<(int_nvvm_sust_b_2d_i8_clamp 4875 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4876 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4877 Int16Regs:$r)>; 4878 4879def : Pat<(int_nvvm_sust_b_2d_i16_clamp 4880 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4881 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4882 Int16Regs:$r)>; 4883 4884def : Pat<(int_nvvm_sust_b_2d_i32_clamp 4885 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 4886 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4887 Int32Regs:$r)>; 4888 4889def : Pat<(int_nvvm_sust_b_2d_i64_clamp 4890 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 4891 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4892 Int64Regs:$r)>; 4893 4894def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 4895 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 4896 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4897 Int16Regs:$r, Int16Regs:$g)>; 4898 4899def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 4900 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 4901 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4902 Int16Regs:$r, Int16Regs:$g)>; 4903 4904def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 4905 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 4906 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4907 Int32Regs:$r, Int32Regs:$g)>; 4908 4909def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 4910 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 4911 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4912 Int64Regs:$r, Int64Regs:$g)>; 4913 4914def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 4915 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4916 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4917 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4918 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4919 4920def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 4921 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4922 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4923 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4924 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4925 4926def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 4927 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4928 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4929 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4930 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4931 4932 4933 4934def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 4935 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4936 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, 4937 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4938 Int16Regs:$r)>; 4939 4940def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 4941 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4942 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, 4943 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4944 Int16Regs:$r)>; 4945 4946def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 4947 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 4948 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, 4949 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4950 Int32Regs:$r)>; 4951 4952def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 4953 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 4954 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, 4955 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4956 Int64Regs:$r)>; 4957 4958def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 4959 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4960 Int16Regs:$r, Int16Regs:$g), 4961 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4962 Int32Regs:$x, Int32Regs:$y, 4963 Int16Regs:$r, Int16Regs:$g)>; 4964 4965def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 4966 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4967 Int16Regs:$r, Int16Regs:$g), 4968 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4969 Int32Regs:$x, Int32Regs:$y, 4970 Int16Regs:$r, Int16Regs:$g)>; 4971 4972def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 4973 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 4974 Int32Regs:$g), 4975 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4976 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 4977 4978def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 4979 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 4980 Int64Regs:$g), 4981 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4982 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 4983 4984def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 4985 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4986 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4987 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, 4988 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4989 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4990 4991def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 4992 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4993 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4994 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, 4995 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4996 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4997 4998def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 4999 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5000 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5001 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5002 Int32Regs:$x, Int32Regs:$y, 5003 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5004 5005 5006 5007def : Pat<(int_nvvm_sust_b_3d_i8_clamp 5008 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5009 Int16Regs:$r), 5010 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, 5011 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5012 Int16Regs:$r)>; 5013 5014def : Pat<(int_nvvm_sust_b_3d_i16_clamp 5015 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5016 Int16Regs:$r), 5017 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, 5018 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5019 Int16Regs:$r)>; 5020 5021def : Pat<(int_nvvm_sust_b_3d_i32_clamp 5022 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5023 Int32Regs:$r), 5024 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, 5025 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5026 Int32Regs:$r)>; 5027 5028def : Pat<(int_nvvm_sust_b_3d_i64_clamp 5029 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5030 Int64Regs:$r), 5031 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, 5032 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5033 Int64Regs:$r)>; 5034 5035def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 5036 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5037 Int16Regs:$r, Int16Regs:$g), 5038 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, 5039 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5040 Int16Regs:$r, Int16Regs:$g)>; 5041 5042def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 5043 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5044 Int16Regs:$r, Int16Regs:$g), 5045 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, 5046 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5047 Int16Regs:$r, Int16Regs:$g)>; 5048 5049def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 5050 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5051 Int32Regs:$r, Int32Regs:$g), 5052 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, 5053 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5054 Int32Regs:$r, Int32Regs:$g)>; 5055 5056def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 5057 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5058 Int64Regs:$r, Int64Regs:$g), 5059 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, 5060 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5061 Int64Regs:$r, Int64Regs:$g)>; 5062 5063def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 5064 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5065 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5066 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, 5067 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5068 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5069 5070def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 5071 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5072 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5073 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, 5074 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5075 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5076 5077def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 5078 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5079 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5080 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, 5081 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5082 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5083 5084 5085// .trap variant 5086def : Pat<(int_nvvm_sust_b_1d_i8_trap 5087 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5088 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5089 5090def : Pat<(int_nvvm_sust_b_1d_i16_trap 5091 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5092 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5093 5094def : Pat<(int_nvvm_sust_b_1d_i32_trap 5095 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5096 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5097 5098def : Pat<(int_nvvm_sust_b_1d_i64_trap 5099 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5100 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5101 5102def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 5103 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5104 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5105 Int16Regs:$r, Int16Regs:$g)>; 5106 5107def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 5108 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5109 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5110 Int16Regs:$r, Int16Regs:$g)>; 5111 5112def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 5113 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5114 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5115 Int32Regs:$r, Int32Regs:$g)>; 5116 5117def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 5118 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5119 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, 5120 Int64Regs:$r, Int64Regs:$g)>; 5121 5122def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 5123 Int64Regs:$s, Int32Regs:$x, 5124 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5125 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5126 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5127 5128def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 5129 Int64Regs:$s, Int32Regs:$x, 5130 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5131 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5132 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5133 5134def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 5135 Int64Regs:$s, Int32Regs:$x, 5136 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5137 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5138 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5139 5140 5141 5142def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 5143 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5144 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5145 Int16Regs:$r)>; 5146 5147def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 5148 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5149 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5150 Int16Regs:$r)>; 5151 5152def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 5153 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5154 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5155 Int32Regs:$r)>; 5156 5157def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 5158 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5159 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5160 Int64Regs:$r)>; 5161 5162def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 5163 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5164 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5165 Int16Regs:$r, Int16Regs:$g)>; 5166 5167def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 5168 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5169 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5170 Int16Regs:$r, Int16Regs:$g)>; 5171 5172def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 5173 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5174 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5175 Int32Regs:$r, Int32Regs:$g)>; 5176 5177def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 5178 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5179 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5180 Int64Regs:$r, Int64Regs:$g)>; 5181 5182def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 5183 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5184 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5185 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5186 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5187 5188def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 5189 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5190 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5191 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5192 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5193 5194def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 5195 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5196 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5197 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5198 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5199 5200 5201 5202def : Pat<(int_nvvm_sust_b_2d_i8_trap 5203 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5204 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5205 Int16Regs:$r)>; 5206 5207def : Pat<(int_nvvm_sust_b_2d_i16_trap 5208 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5209 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5210 Int16Regs:$r)>; 5211 5212def : Pat<(int_nvvm_sust_b_2d_i32_trap 5213 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5214 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5215 Int32Regs:$r)>; 5216 5217def : Pat<(int_nvvm_sust_b_2d_i64_trap 5218 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5219 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5220 Int64Regs:$r)>; 5221 5222def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 5223 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5224 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5225 Int16Regs:$r, Int16Regs:$g)>; 5226 5227def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 5228 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5229 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5230 Int16Regs:$r, Int16Regs:$g)>; 5231 5232def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 5233 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5234 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5235 Int32Regs:$r, Int32Regs:$g)>; 5236 5237def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 5238 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5239 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5240 Int64Regs:$r, Int64Regs:$g)>; 5241 5242def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 5243 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5244 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5245 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5246 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5247 5248def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 5249 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5250 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5251 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5252 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5253 5254def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 5255 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5256 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5257 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5258 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5259 5260 5261 5262def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 5263 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5264 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 5265 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5266 Int16Regs:$r)>; 5267 5268def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 5269 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5270 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 5271 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5272 Int16Regs:$r)>; 5273 5274def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 5275 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5276 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 5277 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5278 Int32Regs:$r)>; 5279 5280def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 5281 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5282 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, 5283 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5284 Int64Regs:$r)>; 5285 5286def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 5287 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5288 Int16Regs:$r, Int16Regs:$g), 5289 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 5290 Int32Regs:$x, Int32Regs:$y, 5291 Int16Regs:$r, Int16Regs:$g)>; 5292 5293def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 5294 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5295 Int16Regs:$r, Int16Regs:$g), 5296 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 5297 Int32Regs:$x, Int32Regs:$y, 5298 Int16Regs:$r, Int16Regs:$g)>; 5299 5300def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 5301 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5302 Int32Regs:$g), 5303 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5304 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5305 5306def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 5307 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5308 Int64Regs:$g), 5309 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, 5310 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5311 5312def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 5313 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5314 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5315 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 5316 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5317 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5318 5319def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 5320 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5321 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5322 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 5323 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5324 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5325 5326def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 5327 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5328 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5329 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5330 Int32Regs:$x, Int32Regs:$y, 5331 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5332 5333 5334 5335def : Pat<(int_nvvm_sust_b_3d_i8_trap 5336 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5337 Int16Regs:$r), 5338 (SUST_B_3D_B8_TRAP_R Int64Regs:$s, 5339 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5340 Int16Regs:$r)>; 5341 5342def : Pat<(int_nvvm_sust_b_3d_i16_trap 5343 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5344 Int16Regs:$r), 5345 (SUST_B_3D_B16_TRAP_R Int64Regs:$s, 5346 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5347 Int16Regs:$r)>; 5348 5349def : Pat<(int_nvvm_sust_b_3d_i32_trap 5350 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5351 Int32Regs:$r), 5352 (SUST_B_3D_B32_TRAP_R Int64Regs:$s, 5353 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5354 Int32Regs:$r)>; 5355 5356def : Pat<(int_nvvm_sust_b_3d_i64_trap 5357 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5358 Int64Regs:$r), 5359 (SUST_B_3D_B64_TRAP_R Int64Regs:$s, 5360 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5361 Int64Regs:$r)>; 5362 5363def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 5364 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5365 Int16Regs:$r, Int16Regs:$g), 5366 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, 5367 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5368 Int16Regs:$r, Int16Regs:$g)>; 5369 5370def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 5371 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5372 Int16Regs:$r, Int16Regs:$g), 5373 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, 5374 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5375 Int16Regs:$r, Int16Regs:$g)>; 5376 5377def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 5378 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5379 Int32Regs:$r, Int32Regs:$g), 5380 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, 5381 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5382 Int32Regs:$r, Int32Regs:$g)>; 5383 5384def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 5385 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5386 Int64Regs:$r, Int64Regs:$g), 5387 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, 5388 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5389 Int64Regs:$r, Int64Regs:$g)>; 5390 5391def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 5392 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5393 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5394 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, 5395 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5396 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5397 5398def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 5399 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5400 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5401 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, 5402 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5403 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5404 5405def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 5406 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5407 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5408 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, 5409 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5410 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5411 5412 5413// .zero variant 5414def : Pat<(int_nvvm_sust_b_1d_i8_zero 5415 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5416 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5417 5418def : Pat<(int_nvvm_sust_b_1d_i16_zero 5419 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5420 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5421 5422def : Pat<(int_nvvm_sust_b_1d_i32_zero 5423 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5424 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5425 5426def : Pat<(int_nvvm_sust_b_1d_i64_zero 5427 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5428 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5429 5430def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 5431 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5432 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5433 Int16Regs:$r, Int16Regs:$g)>; 5434 5435def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 5436 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5437 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5438 Int16Regs:$r, Int16Regs:$g)>; 5439 5440def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 5441 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5442 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5443 Int32Regs:$r, Int32Regs:$g)>; 5444 5445def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 5446 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5447 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, 5448 Int64Regs:$r, Int64Regs:$g)>; 5449 5450def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 5451 Int64Regs:$s, Int32Regs:$x, 5452 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5453 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5454 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5455 5456def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 5457 Int64Regs:$s, Int32Regs:$x, 5458 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5459 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5460 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5461 5462def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 5463 Int64Regs:$s, Int32Regs:$x, 5464 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5465 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5466 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5467 5468 5469 5470def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 5471 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5472 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5473 Int16Regs:$r)>; 5474 5475def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 5476 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5477 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5478 Int16Regs:$r)>; 5479 5480def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 5481 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5482 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5483 Int32Regs:$r)>; 5484 5485def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 5486 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5487 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5488 Int64Regs:$r)>; 5489 5490def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 5491 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5492 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5493 Int16Regs:$r, Int16Regs:$g)>; 5494 5495def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 5496 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5497 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5498 Int16Regs:$r, Int16Regs:$g)>; 5499 5500def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 5501 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5502 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5503 Int32Regs:$r, Int32Regs:$g)>; 5504 5505def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 5506 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5507 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5508 Int64Regs:$r, Int64Regs:$g)>; 5509 5510def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 5511 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5512 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5513 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5514 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5515 5516def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 5517 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5518 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5519 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5520 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5521 5522def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 5523 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5524 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5525 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5526 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5527 5528 5529 5530def : Pat<(int_nvvm_sust_b_2d_i8_zero 5531 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5532 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5533 Int16Regs:$r)>; 5534 5535def : Pat<(int_nvvm_sust_b_2d_i16_zero 5536 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5537 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5538 Int16Regs:$r)>; 5539 5540def : Pat<(int_nvvm_sust_b_2d_i32_zero 5541 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5542 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5543 Int32Regs:$r)>; 5544 5545def : Pat<(int_nvvm_sust_b_2d_i64_zero 5546 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5547 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5548 Int64Regs:$r)>; 5549 5550def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 5551 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5552 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5553 Int16Regs:$r, Int16Regs:$g)>; 5554 5555def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 5556 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5557 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5558 Int16Regs:$r, Int16Regs:$g)>; 5559 5560def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 5561 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5562 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5563 Int32Regs:$r, Int32Regs:$g)>; 5564 5565def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 5566 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5567 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5568 Int64Regs:$r, Int64Regs:$g)>; 5569 5570def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 5571 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5572 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5573 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5574 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5575 5576def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 5577 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5578 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5579 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5580 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5581 5582def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 5583 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5584 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5585 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5586 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5587 5588 5589 5590def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 5591 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5592 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, 5593 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5594 Int16Regs:$r)>; 5595 5596def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 5597 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5598 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, 5599 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5600 Int16Regs:$r)>; 5601 5602def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 5603 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5604 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, 5605 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5606 Int32Regs:$r)>; 5607 5608def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 5609 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5610 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, 5611 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5612 Int64Regs:$r)>; 5613 5614def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 5615 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5616 Int16Regs:$r, Int16Regs:$g), 5617 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, 5618 Int32Regs:$x, Int32Regs:$y, 5619 Int16Regs:$r, Int16Regs:$g)>; 5620 5621def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 5622 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5623 Int16Regs:$r, Int16Regs:$g), 5624 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, 5625 Int32Regs:$x, Int32Regs:$y, 5626 Int16Regs:$r, Int16Regs:$g)>; 5627 5628def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 5629 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5630 Int32Regs:$g), 5631 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5632 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5633 5634def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 5635 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5636 Int64Regs:$g), 5637 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, 5638 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5639 5640def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 5641 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5642 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5643 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, 5644 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5645 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5646 5647def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 5648 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5649 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5650 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, 5651 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5652 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5653 5654def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 5655 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5656 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5657 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5658 Int32Regs:$x, Int32Regs:$y, 5659 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5660 5661 5662 5663def : Pat<(int_nvvm_sust_b_3d_i8_zero 5664 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5665 Int16Regs:$r), 5666 (SUST_B_3D_B8_ZERO_R Int64Regs:$s, 5667 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5668 Int16Regs:$r)>; 5669 5670def : Pat<(int_nvvm_sust_b_3d_i16_zero 5671 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5672 Int16Regs:$r), 5673 (SUST_B_3D_B16_ZERO_R Int64Regs:$s, 5674 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5675 Int16Regs:$r)>; 5676 5677def : Pat<(int_nvvm_sust_b_3d_i32_zero 5678 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5679 Int32Regs:$r), 5680 (SUST_B_3D_B32_ZERO_R Int64Regs:$s, 5681 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5682 Int32Regs:$r)>; 5683 5684def : Pat<(int_nvvm_sust_b_3d_i64_zero 5685 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5686 Int64Regs:$r), 5687 (SUST_B_3D_B64_ZERO_R Int64Regs:$s, 5688 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5689 Int64Regs:$r)>; 5690 5691def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 5692 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5693 Int16Regs:$r, Int16Regs:$g), 5694 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, 5695 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5696 Int16Regs:$r, Int16Regs:$g)>; 5697 5698def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 5699 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5700 Int16Regs:$r, Int16Regs:$g), 5701 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, 5702 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5703 Int16Regs:$r, Int16Regs:$g)>; 5704 5705def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 5706 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5707 Int32Regs:$r, Int32Regs:$g), 5708 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, 5709 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5710 Int32Regs:$r, Int32Regs:$g)>; 5711 5712def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 5713 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5714 Int64Regs:$r, Int64Regs:$g), 5715 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, 5716 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5717 Int64Regs:$r, Int64Regs:$g)>; 5718 5719def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 5720 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5721 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5722 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, 5723 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5724 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5725 5726def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 5727 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5728 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5729 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, 5730 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5731 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5732 5733def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 5734 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5735 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5736 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, 5737 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5738 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5739 5740 5741 5742 5743def : Pat<(int_nvvm_sust_p_1d_i8_trap 5744 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5745 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5746 5747def : Pat<(int_nvvm_sust_p_1d_i16_trap 5748 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5749 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5750 5751def : Pat<(int_nvvm_sust_p_1d_i32_trap 5752 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5753 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5754 5755def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 5756 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5757 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5758 Int16Regs:$r, Int16Regs:$g)>; 5759 5760def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 5761 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5762 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5763 Int16Regs:$r, Int16Regs:$g)>; 5764 5765def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 5766 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5767 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5768 Int32Regs:$r, Int32Regs:$g)>; 5769 5770def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 5771 Int64Regs:$s, Int32Regs:$x, 5772 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5773 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5774 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5775 5776def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 5777 Int64Regs:$s, Int32Regs:$x, 5778 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5779 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5780 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5781 5782def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 5783 Int64Regs:$s, Int32Regs:$x, 5784 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5785 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5786 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5787 5788 5789 5790def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 5791 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5792 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5793 Int16Regs:$r)>; 5794 5795def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 5796 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5797 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5798 Int16Regs:$r)>; 5799 5800def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 5801 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5802 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5803 Int32Regs:$r)>; 5804 5805def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 5806 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5807 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5808 Int16Regs:$r, Int16Regs:$g)>; 5809 5810def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 5811 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5812 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5813 Int16Regs:$r, Int16Regs:$g)>; 5814 5815def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 5816 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5817 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5818 Int32Regs:$r, Int32Regs:$g)>; 5819 5820def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 5821 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5822 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5823 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5824 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5825 5826def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 5827 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5828 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5829 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5830 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5831 5832def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 5833 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5834 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5835 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5836 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5837 5838 5839 5840def : Pat<(int_nvvm_sust_p_2d_i8_trap 5841 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5842 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5843 Int16Regs:$r)>; 5844 5845def : Pat<(int_nvvm_sust_p_2d_i16_trap 5846 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5847 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5848 Int16Regs:$r)>; 5849 5850def : Pat<(int_nvvm_sust_p_2d_i32_trap 5851 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5852 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5853 Int32Regs:$r)>; 5854 5855def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 5856 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5857 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5858 Int16Regs:$r, Int16Regs:$g)>; 5859 5860def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 5861 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5862 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5863 Int16Regs:$r, Int16Regs:$g)>; 5864 5865def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 5866 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5867 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5868 Int32Regs:$r, Int32Regs:$g)>; 5869 5870def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 5871 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5872 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5873 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5874 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5875 5876def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 5877 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5878 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5879 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5880 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5881 5882def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 5883 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5884 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5885 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5886 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5887 5888 5889 5890def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 5891 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5892 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 5893 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5894 Int16Regs:$r)>; 5895 5896def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 5897 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5898 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 5899 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5900 Int16Regs:$r)>; 5901 5902def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 5903 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5904 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 5905 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5906 Int32Regs:$r)>; 5907 5908def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 5909 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5910 Int16Regs:$r, Int16Regs:$g), 5911 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 5912 Int32Regs:$x, Int32Regs:$y, 5913 Int16Regs:$r, Int16Regs:$g)>; 5914 5915def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 5916 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5917 Int16Regs:$r, Int16Regs:$g), 5918 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 5919 Int32Regs:$x, Int32Regs:$y, 5920 Int16Regs:$r, Int16Regs:$g)>; 5921 5922def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 5923 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5924 Int32Regs:$g), 5925 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5926 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5927 5928def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 5929 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5930 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5931 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 5932 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5933 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5934 5935def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 5936 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5937 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5938 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 5939 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5940 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5941 5942def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 5943 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5944 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5945 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5946 Int32Regs:$x, Int32Regs:$y, 5947 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5948 5949 5950 5951def : Pat<(int_nvvm_sust_p_3d_i8_trap 5952 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5953 Int16Regs:$r), 5954 (SUST_P_3D_B8_TRAP_R Int64Regs:$s, 5955 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5956 Int16Regs:$r)>; 5957 5958def : Pat<(int_nvvm_sust_p_3d_i16_trap 5959 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5960 Int16Regs:$r), 5961 (SUST_P_3D_B16_TRAP_R Int64Regs:$s, 5962 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5963 Int16Regs:$r)>; 5964 5965def : Pat<(int_nvvm_sust_p_3d_i32_trap 5966 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5967 Int32Regs:$r), 5968 (SUST_P_3D_B32_TRAP_R Int64Regs:$s, 5969 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5970 Int32Regs:$r)>; 5971 5972def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 5973 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5974 Int16Regs:$r, Int16Regs:$g), 5975 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, 5976 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5977 Int16Regs:$r, Int16Regs:$g)>; 5978 5979def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 5980 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5981 Int16Regs:$r, Int16Regs:$g), 5982 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, 5983 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5984 Int16Regs:$r, Int16Regs:$g)>; 5985 5986def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 5987 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5988 Int32Regs:$r, Int32Regs:$g), 5989 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, 5990 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5991 Int32Regs:$r, Int32Regs:$g)>; 5992 5993def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 5994 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5995 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5996 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, 5997 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5998 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5999 6000def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 6001 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6002 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6003 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, 6004 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6005 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6006 6007def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 6008 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6009 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6010 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, 6011 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6012 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6013 6014//----------------------------------- 6015// Read Special Registers 6016//----------------------------------- 6017 6018class PTX_READ_SREG_R64<string regname, Intrinsic intop> 6019 : NVPTXInst<(outs Int64Regs:$d), (ins), 6020 !strconcat("mov.u64 \t$d, %", regname, ";"), 6021 [(set Int64Regs:$d, (intop))]>; 6022 6023class PTX_READ_SREG_R32<string regname, Intrinsic intop> 6024 : NVPTXInst<(outs Int32Regs:$d), (ins), 6025 !strconcat("mov.u32 \t$d, %", regname, ";"), 6026 [(set Int32Regs:$d, (intop))]>; 6027 6028// TODO Add read vector-version of special registers 6029 6030def INT_PTX_SREG_TID_X : 6031 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>; 6032def INT_PTX_SREG_TID_Y : 6033 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>; 6034def INT_PTX_SREG_TID_Z : 6035 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>; 6036def INT_PTX_SREG_TID_W : 6037 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>; 6038 6039def INT_PTX_SREG_NTID_X : 6040 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>; 6041def INT_PTX_SREG_NTID_Y : 6042 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>; 6043def INT_PTX_SREG_NTID_Z : 6044 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>; 6045def INT_PTX_SREG_NTID_W : 6046 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>; 6047 6048def INT_PTX_SREG_LANEID : 6049 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 6050def INT_PTX_SREG_WARPID : 6051 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 6052def INT_PTX_SREG_NWARPID : 6053 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 6054 6055def INT_PTX_SREG_CTAID_X : 6056 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>; 6057def INT_PTX_SREG_CTAID_Y : 6058 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>; 6059def INT_PTX_SREG_CTAID_Z : 6060 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>; 6061def INT_PTX_SREG_CTAID_W : 6062 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>; 6063 6064def INT_PTX_SREG_NCTAID_X : 6065 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>; 6066def INT_PTX_SREG_NCTAID_Y : 6067 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>; 6068def INT_PTX_SREG_NCTAID_Z : 6069 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>; 6070def INT_PTX_SREG_NCTAID_W : 6071 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>; 6072 6073def INT_PTX_SREG_SMID : 6074 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 6075def INT_PTX_SREG_NSMID : 6076 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 6077def INT_PTX_SREG_GRIDID : 6078 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 6079 6080def INT_PTX_SREG_LANEMASK_EQ : 6081 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 6082def INT_PTX_SREG_LANEMASK_LE : 6083 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 6084def INT_PTX_SREG_LANEMASK_LT : 6085 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 6086def INT_PTX_SREG_LANEMASK_GE : 6087 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 6088def INT_PTX_SREG_LANEMASK_GT : 6089 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 6090 6091def INT_PTX_SREG_CLOCK : 6092 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 6093def INT_PTX_SREG_CLOCK64 : 6094 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 6095 6096def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 6097def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 6098def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 6099def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 6100 6101// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 6102// handle the constant. 6103def INT_PTX_SREG_WARPSIZE : 6104 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 6105 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 6106 6107// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 6108// In addition to target-independent fields provided by WMMA_REGS, it adds 6109// the fields commonly used to implement specific PTX instruction -- register 6110// types and names, constraints, parts of assembly, etc. 6111class WMMA_REGINFO<WMMA_REGS r, string op> 6112 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 6113 // NVPTX register types used to carry fragment data. 6114 NVPTXRegClass regclass = !cond( 6115 !eq(ptx_elt_type, "f16") : Float16x2Regs, 6116 !eq(ptx_elt_type, "f32") : Float32Regs, 6117 !eq(ptx_elt_type, "f64") : Float64Regs, 6118 !eq(ptx_elt_type, "bf16") : Int32Regs, 6119 !eq(ptx_elt_type, "tf32") : Int32Regs, 6120 !eq(ptx_elt_type, "s32") : Int32Regs, 6121 !eq(ptx_elt_type, "b16") : Int32Regs, 6122 !eq(ptx_elt_type, "s8") : Int32Regs, 6123 !eq(ptx_elt_type, "u8") : Int32Regs, 6124 !eq(ptx_elt_type, "s4") : Int32Regs, 6125 !eq(ptx_elt_type, "u4") : Int32Regs, 6126 !eq(ptx_elt_type, "b1") : Int32Regs); 6127 6128 // Instruction input/output arguments for the fragment. 6129 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 6130 6131 // List of register names for the fragment -- ["ra0", "ra1",...] 6132 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 6133 6134 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 6135 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 6136 6137 // Predicates for particular fragment variant. Technically those are 6138 // per-instruction predicates, but currently all fragments that can be used in 6139 // a given instruction are subject to the same constraints, so an instruction 6140 // can use predicates from any of its fragments. If/when this is no 6141 // longer the case, we can concat all per-fragment predicates to enforce that 6142 // all fragments of the instruction are viable. 6143 list<Predicate> Predicates = !cond( 6144 // fp16 -> fp16/fp32 @ m16n16k16 6145 !and(!eq(geom, "m16n16k16"), 6146 !or(!eq(ptx_elt_type, "f16"), 6147 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60], 6148 6149 !and(!eq(geom,"m8n8k4"), 6150 !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70], 6151 6152 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 6153 !and(!or(!eq(geom, "m8n32k16"), 6154 !eq(geom, "m32n8k16")), 6155 !or(!eq(ptx_elt_type, "f16"), 6156 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61], 6157 6158 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 6159 !and(!or(!eq(geom,"m16n16k16"), 6160 !eq(geom,"m8n32k16"), 6161 !eq(geom,"m32n8k16")), 6162 !or(!eq(ptx_elt_type, "u8"), 6163 !eq(ptx_elt_type, "s8"), 6164 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63], 6165 6166 !and(!or(!eq(geom,"m16n16k16"), 6167 !eq(geom,"m8n32k16"), 6168 !eq(geom,"m32n8k16")), 6169 !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70], 6170 6171 !and(!eq(geom,"m16n16k8"), 6172 !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70], 6173 6174 !and(!eq(geom,"m16n16k8"), 6175 !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70], 6176 6177 // b1 -> s32 @ m8n8k128(b1) 6178 !and(!ne(op,"mma"), 6179 !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63], 6180 6181 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 6182 !and(!ne(op,"mma"), 6183 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63], 6184 6185 !or(!eq(geom,"m16n8k8"), 6186 !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65], 6187 6188 !and(!ne(ptx_elt_type,"f64"), 6189 !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64], 6190 6191 // mma m8n8k32 requires higher PTX version 6192 !and(!eq(op,"mma"), 6193 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65], 6194 6195 !and(!eq(ptx_elt_type,"f64"), 6196 !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70], 6197 6198 !and(!eq(op,"mma"), 6199 !or(!eq(geom, "m16n8k16"), 6200 !eq(geom, "m16n8k4"), 6201 !eq(geom, "m16n8k32"), 6202 !eq(geom, "m16n8k64"), 6203 !eq(geom, "m8n8k128"), 6204 !eq(geom, "m16n8k128"), 6205 !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70], 6206 6207 !and(!eq(op,"ldmatrix"), 6208 !eq(ptx_elt_type,"b16"), 6209 !eq(geom, "m8n8")) : [hasSM75, hasPTX65]); 6210 6211 // template DAGs for instruction inputs/output. 6212 dag Outs = !dag(outs, ptx_regs, reg_names); 6213 dag Ins = !dag(ins, ptx_regs, reg_names); 6214} 6215 6216// Convert dag of arguments into a dag to match given intrinsic. 6217class BuildPatternI<Intrinsic Intr, dag Ins> { 6218 // Build a dag pattern that matches the intrinsic call. 6219 dag ret = !foreach(tmp, Ins, 6220 !subst(imem, ADDRvar, 6221 !subst(MEMri64, ADDRri64, 6222 !subst(MEMri, ADDRri, 6223 !subst(ins, Intr, tmp))))); 6224} 6225 6226// Same as above, but uses PatFrag instead of an Intrinsic. 6227class BuildPatternPF<PatFrag Intr, dag Ins> { 6228 // Build a dag pattern that matches the intrinsic call. 6229 dag ret = !foreach(tmp, Ins, 6230 !subst(imem, ADDRvar, 6231 !subst(MEMri64, ADDRri64, 6232 !subst(MEMri, ADDRri, 6233 !subst(ins, Intr, tmp))))); 6234} 6235 6236// Common WMMA-related fields used for building patterns for all MMA instructions. 6237class WMMA_INSTR<string _Intr, list<dag> _Args> 6238 : NVPTXInst<(outs), (ins), "?", []> { 6239 Intrinsic Intr = !cast<Intrinsic>(_Intr); 6240 // Concatenate all arguments into a single dag. 6241 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 6242 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 6243 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 6244} 6245 6246// 6247// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6248// 6249 6250class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 6251 DAGOperand SrcOp> 6252 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 6253 [!con((ins SrcOp:$src), 6254 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6255 Requires<Frag.Predicates> { 6256 // Load/store intrinsics are overloaded on pointer's address space. 6257 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6258 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6259 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 6260 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 6261 // Build PatFrag that only matches particular address space. 6262 PatFrag IntrFrag = PatFrag<PFOperands, 6263 PFOperandsIntr, 6264 !cond(!eq(Space, ".shared"): AS_match.shared, 6265 !eq(Space, ".global"): AS_match.global, 6266 true: AS_match.generic)>; 6267 // Build AS-constrained pattern. 6268 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6269 6270 let OutOperandList = Frag.Outs; 6271 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6272 let AsmString = "wmma.load." 6273 # Frag.frag 6274 # ".sync" 6275 # "${ptx:aligned}" 6276 # "." # Layout 6277 # "." # Frag.geom 6278 # Space 6279 # "." # Frag.ptx_elt_type # " \t" 6280 # Frag.regstring 6281 # ", [$src]" 6282 # !if(WithStride, ", $ldm", "") 6283 # ";"; 6284} 6285 6286// 6287// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6288// 6289class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 6290 bit WithStride, DAGOperand DstOp> 6291 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 6292 [!con((ins DstOp:$dst), 6293 Frag.Ins, 6294 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6295 Requires<Frag.Predicates> { 6296 6297 // Load/store intrinsics are overloaded on pointer's address space. 6298 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6299 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6300 dag PFOperands = !con((ops node:$dst), 6301 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 6302 !if(WithStride, (ops node:$ldm), (ops))); 6303 // Build PatFrag that only matches particular address space. 6304 PatFrag IntrFrag = PatFrag<PFOperands, 6305 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 6306 !cond(!eq(Space, ".shared"): AS_match.shared, 6307 !eq(Space, ".global"): AS_match.global, 6308 true: AS_match.generic)>; 6309 // Build AS-constrained pattern. 6310 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6311 6312 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6313 let OutOperandList = (outs); 6314 let AsmString = "wmma.store.d.sync" 6315 # "${ptx:aligned}" 6316 # "." # Layout 6317 # "." # Frag.geom 6318 # Space 6319 # "." # Frag.ptx_elt_type 6320 # " \t[$dst]," 6321 # Frag.regstring 6322 # !if(WithStride, ", $ldm", "") 6323 # ";"; 6324} 6325 6326// Create all load/store variants 6327defset list<WMMA_INSTR> MMA_LDSTs = { 6328 foreach layout = ["row", "col"] in { 6329 foreach stride = [false, true] in { 6330 foreach space = [".global", ".shared", ""] in { 6331 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6332 foreach frag = NVVM_MMA_OPS.all_ld_ops in 6333 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6334 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 6335 foreach frag = NVVM_MMA_OPS.all_st_ops in 6336 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6337 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 6338 } // addr 6339 } // space 6340 } // stride 6341 } // layout 6342} // defset 6343 6344// B1 instruction variants need extra constraints. 6345class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 6346 string Op = b1op; 6347 WMMA_REGINFO Frag = FragA; 6348 list<Predicate> ret = !listconcat( 6349 FragA.Predicates, 6350 !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[]) 6351 ); 6352} 6353// WMMA.MMA 6354class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6355 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6356 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 6357 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 6358 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6359 // Requires does not seem to have effect on Instruction w/o Patterns. 6360 // We set it here anyways and propagate to the Pat<> we construct below. 6361 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6362 let OutOperandList = FragD.Outs; 6363 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6364 string TypeList = !cond( 6365 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 6366 # "." # FragC.ptx_elt_type, 6367 1: "." # FragD.ptx_elt_type 6368 # "." # FragA.ptx_elt_type 6369 # "." # FragB.ptx_elt_type 6370 # "." # FragC.ptx_elt_type, 6371 ); 6372 let AsmString = "wmma.mma" 6373 # b1op 6374 # ".sync" 6375 # "${ptx:aligned}" 6376 # "." # ALayout 6377 # "." # BLayout 6378 # "." # FragA.geom 6379 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 6380 # TypeList 6381 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 6382 # FragD.regstring # ",\n\t\t" 6383 # FragA.regstring # ",\n\t\t" 6384 # FragB.regstring # ",\n\t\t" 6385 # FragC.regstring # ";"; 6386} 6387 6388defset list<WMMA_INSTR> WMMAs = { 6389 foreach layout_a = ["row", "col"] in { 6390 foreach layout_b = ["row", "col"] in { 6391 foreach satf = [0, 1] in { 6392 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 6393 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 6394 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6395 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 6396 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 6397 WMMA_REGINFO<op[1], "wmma.mma">, 6398 WMMA_REGINFO<op[2], "wmma.mma">, 6399 WMMA_REGINFO<op[3], "wmma.mma">, 6400 layout_a, layout_b, satf, rnd, b1op>; 6401 } 6402 } // b1op 6403 } // op 6404 } // rnd 6405 } // satf 6406 } // layout_b 6407 } // layout_a 6408} // defset 6409 6410// MMA 6411class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6412 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6413 string ALayout, string BLayout, int Satfinite, string b1op> 6414 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 6415 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6416 // Requires does not seem to have effect on Instruction w/o Patterns. 6417 // We set it here anyways and propagate to the Pat<> we construct below. 6418 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6419 let OutOperandList = FragD.Outs; 6420 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6421 string TypeList = "." # FragD.ptx_elt_type 6422 # "." # FragA.ptx_elt_type 6423 # "." # FragB.ptx_elt_type 6424 # "." # FragC.ptx_elt_type; 6425 let AsmString = "mma.sync.aligned." 6426 # FragA.geom 6427 # "." # ALayout 6428 # "." # BLayout 6429 # !if(Satfinite, ".satfinite", "") 6430 # TypeList 6431 # b1op # "\n\t\t" 6432 # FragD.regstring # ",\n\t\t" 6433 # FragA.regstring # ",\n\t\t" 6434 # FragB.regstring # ",\n\t\t" 6435 # FragC.regstring # ";"; 6436} 6437 6438defset list<WMMA_INSTR> MMAs = { 6439 foreach layout_a = ["row", "col"] in { 6440 foreach layout_b = ["row", "col"] in { 6441 foreach satf = [0, 1] in { 6442 foreach op = NVVM_MMA_OPS.all_mma_ops in { 6443 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6444 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 6445 def : MMA<WMMA_REGINFO<op[0], "mma">, 6446 WMMA_REGINFO<op[1], "mma">, 6447 WMMA_REGINFO<op[2], "mma">, 6448 WMMA_REGINFO<op[3], "mma">, 6449 layout_a, layout_b, satf, b1op>; 6450 } 6451 } // b1op 6452 } // op 6453 } // satf 6454 } // layout_b 6455 } // layout_a 6456} // defset 6457 6458// 6459// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 6460// 6461class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, 6462 DAGOperand SrcOp> 6463 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, 6464 Requires<Frag.Predicates> { 6465 // Build PatFrag that only matches particular address space. 6466 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), 6467 !cond(!eq(Space, ".shared"): AS_match.shared, 6468 true: AS_match.generic)>; 6469 // Build AS-constrained pattern. 6470 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6471 6472 let OutOperandList = Frag.Outs; 6473 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6474 let AsmString = "ldmatrix.sync.aligned." 6475 # Frag.geom 6476 # "." # Frag.frag 6477 # !if(Transposed, ".trans", "") 6478 # Space 6479 # "." # Frag.ptx_elt_type 6480 # " " # Frag.regstring # ", [$src];"; 6481} 6482 6483// Create all ldmatrix variants 6484defset list<WMMA_INSTR> LDMATRIXs = { 6485 foreach transposed = [false, true] in { 6486 foreach space = [".shared", ""] in { 6487 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6488 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in 6489 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then 6490 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, 6491 addr>; 6492 } // addr 6493 } // space 6494 } // transposed 6495} // defset 6496 6497// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 6498// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 6499// the instruction record. 6500class MMA_PAT<WMMA_INSTR wi> 6501 : Pat<wi.IntrinsicPattern, 6502 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 6503 (wi ptx.version))>, 6504 Requires<wi.Predicates>; 6505 6506// Build intrinsic->instruction patterns for all MMA instructions. 6507foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in 6508 def : MMA_PAT<mma>; 6509