1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX60, hasSM30]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX60, hasSM30]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX60, hasSM30]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX60, hasSM30]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX60, hasSM30]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX60, hasSM30]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX60, hasSM30]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX60, hasSM30]>; 135 136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 137 bit offset_imm, bit mask_imm, bit threadmask_imm> 138 : NVPTXInst<(outs), (ins), "?", []> { 139 NVPTXRegClass rc = !cond( 140 !eq(reg, "i32"): Int32Regs, 141 !eq(reg, "f32"): Float32Regs); 142 string IntrName = "int_nvvm_shfl_" 143 # !if(sync, "sync_", "") 144 # mode 145 # "_" # reg 146 # !if(return_pred, "p", ""); 147 Intrinsic Intr = !cast<Intrinsic>(IntrName); 148 let InOperandList = !con( 149 !if(sync, 150 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 151 (ins)), 152 (ins rc:$src), 153 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 154 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 155 ); 156 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 157 let AsmString = "shfl." 158 # !if(sync, "sync.", "") 159 # mode # ".b32\t" 160 # "$dst" 161 # !if(return_pred, "|$pred", "") # ", " 162 # "$src, $offset, $mask" 163 # !if(sync, ", $threadmask", "") 164 # ";" 165 ; 166 let Pattern = [!con( 167 !foreach(tmp, OutOperandList, 168 !subst(outs, set, 169 !subst(i32imm, imm, tmp))), 170 (set !foreach(tmp, InOperandList, 171 !subst(ins, Intr, 172 !subst(i32imm, imm, tmp)))) 173 )]; 174} 175 176foreach sync = [false, true] in { 177 foreach mode = ["up", "down", "bfly", "idx"] in { 178 foreach regclass = ["i32", "f32"] in { 179 foreach return_pred = [false, true] in { 180 foreach offset_imm = [false, true] in { 181 foreach mask_imm = [false, true] in { 182 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 183 def : SHFL_INSTR<sync, mode, regclass, return_pred, 184 offset_imm, mask_imm, threadmask_imm>, 185 Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>; 186 } 187 } 188 } 189 } 190 } 191 } 192} 193 194// vote.{all,any,uni,ballot} 195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 196 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 197 "vote." # mode # " \t$dest, $pred;", 198 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 199 Requires<[hasPTX60, hasSM30]>; 200} 201 202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 206 207// vote.sync.{all,any,uni,ballot} 208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 209 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 210 "vote.sync." # mode # " \t$dest, $pred, $mask;", 211 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 212 Requires<[hasPTX60, hasSM30]>; 213 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 214 "vote.sync." # mode #" \t$dest, $pred, $mask;", 215 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 216 Requires<[hasPTX60, hasSM30]>; 217} 218 219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 223 224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 225 Operand ImmOp> { 226 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), 227 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 228 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, 229 Requires<[hasPTX60, hasSM70]>; 230 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), 231 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 232 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 233 Requires<[hasPTX60, hasSM70]>; 234 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), 235 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 236 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, 237 Requires<[hasPTX60, hasSM70]>; 238 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), 239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 240 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 241 Requires<[hasPTX60, hasSM70]>; 242} 243 244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 245 i32imm>; 246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 247 i64imm>; 248 249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 250 Operand ImmOp> { 251 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 252 (ins i32imm:$mask, ImmOp:$value), 253 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 254 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 255 Requires<[hasPTX60, hasSM70]>; 256 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 257 (ins Int32Regs:$mask, ImmOp:$value), 258 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 259 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 260 Requires<[hasPTX60, hasSM70]>; 261 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 262 (ins i32imm:$mask, regclass:$value), 263 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 264 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 265 Requires<[hasPTX60, hasSM70]>; 266 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 267 (ins Int32Regs:$mask, regclass:$value), 268 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 269 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 270 Requires<[hasPTX60, hasSM70]>; 271} 272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 273 i32imm>; 274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 275 i64imm>; 276 277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 278 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 279 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 280 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, 281 Requires<[hasPTX70, hasSM80]>; 282} 283 284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 292 293} // isConvergent = true 294 295//----------------------------------- 296// Explicit Memory Fence Functions 297//----------------------------------- 298class MEMBAR<string StrOp, Intrinsic IntOP> : 299 NVPTXInst<(outs), (ins), 300 StrOp, [(IntOP)]>; 301 302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 303def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 305 306 307//----------------------------------- 308// Async Copy Functions 309//----------------------------------- 310 311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 312 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 313 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 314 [(Intrin Int32Regs:$addr)]>, 315 Requires<[hasPTX70, hasSM80]>; 316 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 317 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 318 [(Intrin Int64Regs:$addr)]>, 319 Requires<[hasPTX70, hasSM80]>; 320} 321 322defm CP_ASYNC_MBARRIER_ARRIVE : 323 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 325 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 327 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 329 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 330 331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> { 332 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 333 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 334 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 335 Requires<[hasPTX70, hasSM80]>; 336 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 337 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 338 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 339 Requires<[hasPTX70, hasSM80]>; 340} 341 342defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 343 CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>; 344 345defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 346 CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>; 347 348defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 349 CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>; 350 351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> { 352 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 353 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 354 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 355 Requires<[hasPTX70, hasSM80]>; 356 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 357 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 358 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 359 Requires<[hasPTX70, hasSM80]>; 360} 361 362defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 363 CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>; 364 365def CP_ASYNC_COMMIT_GROUP : 366 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 367 Requires<[hasPTX70, hasSM80]>; 368 369def CP_ASYNC_WAIT_GROUP : 370 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 371 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, 372 Requires<[hasPTX70, hasSM80]>; 373 374def CP_ASYNC_WAIT_ALL : 375 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 376 [(int_nvvm_cp_async_wait_all)]>, 377 Requires<[hasPTX70, hasSM80]>; 378 379//----------------------------------- 380// MBarrier Functions 381//----------------------------------- 382 383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 384 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 385 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 386 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, 387 Requires<[hasPTX70, hasSM80]>; 388 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 389 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 390 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, 391 Requires<[hasPTX70, hasSM80]>; 392} 393 394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 396 int_nvvm_mbarrier_init_shared>; 397 398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 399 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 400 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 401 [(Intrin Int32Regs:$addr)]>, 402 Requires<[hasPTX70, hasSM80]>; 403 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 404 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 405 [(Intrin Int64Regs:$addr)]>, 406 Requires<[hasPTX70, hasSM80]>; 407} 408 409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 411 int_nvvm_mbarrier_inval_shared>; 412 413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 414 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 415 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 416 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 417 Requires<[hasPTX70, hasSM80]>; 418 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 419 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 420 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 421 Requires<[hasPTX70, hasSM80]>; 422} 423 424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 425defm MBARRIER_ARRIVE_SHARED : 426 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 427 428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 429 def _32 : NVPTXInst<(outs Int64Regs:$state), 430 (ins Int32Regs:$addr, Int32Regs:$count), 431 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 432 ".b64 $state, [$addr], $count;"), 433 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 434 Requires<[hasPTX70, hasSM80]>; 435 def _64 : NVPTXInst<(outs Int64Regs:$state), 436 (ins Int64Regs:$addr, Int32Regs:$count), 437 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 438 ".b64 $state, [$addr], $count;"), 439 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 440 Requires<[hasPTX70, hasSM80]>; 441} 442 443defm MBARRIER_ARRIVE_NOCOMPLETE : 444 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 446 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 447 448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 449 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 450 !strconcat("mbarrier.arrive_drop", AddrSpace, 451 ".b64 $state, [$addr];"), 452 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 453 Requires<[hasPTX70, hasSM80]>; 454 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 455 !strconcat("mbarrier.arrive_drop", AddrSpace, 456 ".b64 $state, [$addr];"), 457 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 458 Requires<[hasPTX70, hasSM80]>; 459} 460 461defm MBARRIER_ARRIVE_DROP : 462 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 463defm MBARRIER_ARRIVE_DROP_SHARED : 464 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 465 466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 467 def _32 : NVPTXInst<(outs Int64Regs:$state), 468 (ins Int32Regs:$addr, Int32Regs:$count), 469 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 470 ".b64 $state, [$addr], $count;"), 471 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 472 Requires<[hasPTX70, hasSM80]>; 473 def _64 : NVPTXInst<(outs Int64Regs:$state), 474 (ins Int64Regs:$addr, Int32Regs:$count), 475 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 476 ".b64 $state, [$addr], $count;"), 477 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 478 Requires<[hasPTX70, hasSM80]>; 479} 480 481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 482 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 484 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 485 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 486 487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 488 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 489 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 490 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, 491 Requires<[hasPTX70, hasSM80]>; 492 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 493 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 494 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, 495 Requires<[hasPTX70, hasSM80]>; 496} 497 498defm MBARRIER_TEST_WAIT : 499 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 500defm MBARRIER_TEST_WAIT_SHARED : 501 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 502 503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 504 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 505 "mbarrier.pending_count.b64 $res, $state;", 506 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, 507 Requires<[hasPTX70, hasSM80]>; 508 509def MBARRIER_PENDING_COUNT : 510 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 511 512//----------------------------------- 513// Math Functions 514//----------------------------------- 515 516// Map min(1.0, max(0.0, x)) to sat(x) 517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 518// NaN 519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 520// Same story for fmax, fmin. 521 522def : Pat<(int_nvvm_fmin_f immFloat1, 523 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 524 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 525def : Pat<(int_nvvm_fmin_f immFloat1, 526 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 527 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 528def : Pat<(int_nvvm_fmin_f 529 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 530 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 531def : Pat<(int_nvvm_fmin_f 532 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 533 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 534 535def : Pat<(int_nvvm_fmin_d immDouble1, 536 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 537 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 538def : Pat<(int_nvvm_fmin_d immDouble1, 539 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 540 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 541def : Pat<(int_nvvm_fmin_d 542 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 543 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 544def : Pat<(int_nvvm_fmin_d 545 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 546 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 547 548 549// We need a full string for OpcStr here because we need to deal with case like 550// INT_PTX_RECIP. 551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 552 NVPTXRegClass src_regclass, Intrinsic IntOP> 553 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 554 OpcStr, 555 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 556 557// We need a full string for OpcStr here because we need to deal with the case 558// like INT_PTX_NATIVE_POWR_F. 559class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 560 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 561 : NVPTXInst<(outs t_regclass:$dst), 562 (ins s0_regclass:$src0, s1_regclass:$src1), 563 OpcStr, 564 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 565 566class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 567 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 568 NVPTXRegClass s2_regclass, Intrinsic IntOP> 569 : NVPTXInst<(outs t_regclass:$dst), 570 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 571 OpcStr, 572 [(set t_regclass:$dst, 573 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 574 575// 576// MISC 577// 578 579def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 580 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 581 582// 583// Min Max 584// 585 586def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 587 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 588def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 589 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 590 591def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 592 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 593def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 594 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 595 596def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 597 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 598def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 599 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 600 601 602// 603// Multiplication 604// 605 606def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 607 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 608def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 609 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 610 611def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 612 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 613def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 614 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 615 616def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 617 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 618def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 619 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 620def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 621 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 622def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 623 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 624def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 625 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 626def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 627 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 628def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 629 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 630def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 631 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 632 633def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 634 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 635def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 636 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 637def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 638 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 639def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 640 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 641 642def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 643 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 644def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 645 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 646 647// 648// Div 649// 650 651def INT_NVVM_DIV_APPROX_FTZ_F 652 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 653 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 654def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 655 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 656 657def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 658 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 659def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 660 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 661def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 662 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 663def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 664 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 665def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 666 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 667def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 668 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 669def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 670 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 671def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 672 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 673 674def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 675 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 676def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 677 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 678def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 679 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 680def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 681 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 682 683// 684// Sad 685// 686 687def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 688 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 689def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 690 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 691 692// 693// Floor Ceil 694// 695 696def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 697 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 698def : Pat<(int_nvvm_floor_f Float32Regs:$a), 699 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 700def : Pat<(int_nvvm_floor_d Float64Regs:$a), 701 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 702 703def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 704 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 705def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 706 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 707def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 708 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 709 710// 711// Abs 712// 713 714def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 715 Float32Regs, int_nvvm_fabs_ftz_f>; 716def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 717 Float32Regs, int_nvvm_fabs_f>; 718 719def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 720 Float64Regs, int_nvvm_fabs_d>; 721 722// 723// Round 724// 725 726def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 727 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 728def : Pat<(int_nvvm_round_f Float32Regs:$a), 729 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 730def : Pat<(int_nvvm_round_d Float64Regs:$a), 731 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 732 733// 734// Trunc 735// 736 737def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 738 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 739def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 740 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 741def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 742 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 743 744// 745// Saturate 746// 747 748def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 749 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 750def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 751 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 752def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 753 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 754 755// 756// Exp2 Log2 757// 758 759def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 760 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 761def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 762 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 763def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 764 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 765 766def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 767 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 768def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 769 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 770def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 771 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 772 773// 774// Sin Cos 775// 776 777def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 778 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 779def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 780 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 781 782def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 783 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 784def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 785 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 786 787// 788// Fma 789// 790 791def INT_NVVM_FMA_RN_FTZ_F 792 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 793 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 794def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 795 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 796def INT_NVVM_FMA_RZ_FTZ_F 797 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 798 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 799def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 800 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 801def INT_NVVM_FMA_RM_FTZ_F 802 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 803 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 804def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 805 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 806def INT_NVVM_FMA_RP_FTZ_F 807 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 808 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 809def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 810 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 811 812def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 813 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 814def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 815 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 816def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 817 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 818def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 819 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 820 821// 822// Rcp 823// 824 825def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 826 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 827def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 828 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 829def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 830 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 831def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 832 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 833def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 834 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 835def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 836 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 837def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 838 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 839def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 840 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 841 842def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 843 Float64Regs, int_nvvm_rcp_rn_d>; 844def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 845 Float64Regs, int_nvvm_rcp_rz_d>; 846def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 847 Float64Regs, int_nvvm_rcp_rm_d>; 848def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 849 Float64Regs, int_nvvm_rcp_rp_d>; 850 851def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 852 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 853 854// 855// Sqrt 856// 857 858def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 859 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 860def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 861 Float32Regs, int_nvvm_sqrt_rn_f>; 862def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 863 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 864def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 865 Float32Regs, int_nvvm_sqrt_rz_f>; 866def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 867 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 868def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 869 Float32Regs, int_nvvm_sqrt_rm_f>; 870def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 871 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 872def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 873 Float32Regs, int_nvvm_sqrt_rp_f>; 874def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 875 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 876def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 877 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 878 879def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 880 Float64Regs, int_nvvm_sqrt_rn_d>; 881def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 882 Float64Regs, int_nvvm_sqrt_rz_d>; 883def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 884 Float64Regs, int_nvvm_sqrt_rm_d>; 885def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 886 Float64Regs, int_nvvm_sqrt_rp_d>; 887 888// nvvm_sqrt intrinsic 889def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 890 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 891def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 892 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 893def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 894 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 895def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 896 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 897 898// 899// Rsqrt 900// 901 902def INT_NVVM_RSQRT_APPROX_FTZ_F 903 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 904 int_nvvm_rsqrt_approx_ftz_f>; 905def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 906 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 907def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 908 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 909 910// 911// Add 912// 913 914def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 915 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 916def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 917 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 918def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 919 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 920def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 921 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 922def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 923 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 924def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 925 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 926def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 927 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 928def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 929 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 930 931def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 932 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 933def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 934 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 935def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 936 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 937def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 938 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 939 940// 941// Convert 942// 943 944def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 945 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 946def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 947 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 948def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 949 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 950def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 951 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 952def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 953 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 954def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 955 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 956def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 957 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 958def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 959 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 960 961def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 962 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 963def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 964 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 965def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 966 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 967def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 968 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 969 970def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 971 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 972def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 973 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 974def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 975 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 976def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 977 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 978 979def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 980 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 981def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 982 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 983def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 984 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 985def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 986 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 987 988def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 989 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 990def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 991 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 992def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 993 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 994def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 995 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 996 997def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 998 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 999def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 1000 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 1001def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 1002 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1003def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 1004 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 1005def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 1006 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1007def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 1008 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 1009def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 1010 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1011def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 1012 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 1013 1014def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 1015 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1016def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 1017 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 1018def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 1019 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1020def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 1021 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 1022def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 1023 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1024def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 1025 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 1026def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 1027 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1028def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 1029 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 1030 1031def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 1032 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 1033def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 1034 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 1035def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 1036 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 1037def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 1038 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 1039 1040def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 1041 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 1042def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 1043 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 1044def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 1045 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 1046def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 1047 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 1048 1049def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1050 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1051 1052def INT_NVVM_D2I_LO : F_MATH_1< 1053 !strconcat("{{\n\t", 1054 ".reg .b32 %temp; \n\t", 1055 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1056 "}}"), 1057 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1058def INT_NVVM_D2I_HI : F_MATH_1< 1059 !strconcat("{{\n\t", 1060 ".reg .b32 %temp; \n\t", 1061 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1062 "}}"), 1063 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1064 1065def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 1066 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1067def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 1068 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 1069def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 1070 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1071def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 1072 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 1073def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 1074 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1075def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 1076 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 1077def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 1078 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1079def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 1080 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 1081 1082def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 1083 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1084def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 1085 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 1086def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 1087 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1088def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 1089 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 1090def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 1091 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1092def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 1093 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 1094def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 1095 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1096def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 1097 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 1098 1099def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 1100 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 1101def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 1102 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 1103def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 1104 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 1105def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 1106 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 1107 1108def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 1109 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 1110def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 1111 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 1112def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 1113 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 1114def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 1115 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 1116 1117def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 1118 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 1119def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 1120 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 1121def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 1122 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 1123def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 1124 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 1125 1126def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 1127 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 1128def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 1129 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 1130def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 1131 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 1132def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 1133 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 1134 1135def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 1136 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 1137def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 1138 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 1139def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 1140 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 1141def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 1142 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 1143 1144def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 1145 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 1146def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 1147 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 1148def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 1149 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 1150def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 1151 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 1152 1153 1154def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 1155 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; 1156def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 1157 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; 1158 1159// 1160// Bitcast 1161// 1162 1163def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 1164 Float32Regs, int_nvvm_bitcast_f2i>; 1165def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 1166 Int32Regs, int_nvvm_bitcast_i2f>; 1167 1168def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1169 Int64Regs, int_nvvm_bitcast_ll2d>; 1170def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1171 Float64Regs, int_nvvm_bitcast_d2ll>; 1172 1173// 1174// FNS 1175// 1176 1177class INT_FNS_MBO<dag ins, dag Operands> 1178 : NVPTXInst<(outs Int32Regs:$dst), ins, 1179 "fns.b32 \t$dst, $mask, $base, $offset;", 1180 [(set Int32Regs:$dst, Operands )]>, 1181 Requires<[hasPTX60, hasSM30]>; 1182 1183def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1184 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1185def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1186 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1187def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1188 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1189def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1190 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1191def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1192 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1193def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1194 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1195def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1196 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1197def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1198 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1199 1200//----------------------------------- 1201// Atomic Functions 1202//----------------------------------- 1203 1204class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1205 : PatFrag<ops, frag, AS_match.global>; 1206class ATOMIC_SHARED_CHK <dag ops, dag frag> 1207 : PatFrag<ops, frag, AS_match.shared>; 1208class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1209 : PatFrag<ops, frag, AS_match.generic>; 1210 1211multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1212 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1213 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1214 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1215 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1216 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1217 Requires<Pred>; 1218 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1219 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1220 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1221 Requires<Pred>; 1222} 1223multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1224 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1225 list<Predicate> Pred = []> { 1226 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1227 IntOp, IMMType, IMM, Pred>; 1228 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1229 IntOp, IMMType, IMM, Pred>; 1230} 1231 1232// has 2 operands, neg the second one 1233multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1234 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1235 list<Predicate> Pred> { 1236 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1237 !strconcat( 1238 "{{ \n\t", 1239 ".reg \t.s", TypeStr, " temp; \n\t", 1240 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1241 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1242 "}}"), 1243 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1244 Requires<Pred>; 1245} 1246multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1247 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { 1248 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1249 IntOp, Pred> ; 1250 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1251 IntOp, Pred> ; 1252} 1253 1254// has 3 operands 1255multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1256 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1257 Operand IMMType, list<Predicate> Pred> { 1258 def reg : NVPTXInst<(outs regclass:$dst), 1259 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1260 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1261 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1262 Requires<Pred>; 1263 1264 def imm1 : NVPTXInst<(outs regclass:$dst), 1265 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1266 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1267 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1268 Requires<Pred>; 1269 1270 def imm2 : NVPTXInst<(outs regclass:$dst), 1271 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1272 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1273 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1274 Requires<Pred>; 1275 1276 def imm3 : NVPTXInst<(outs regclass:$dst), 1277 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1278 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1279 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1280 Requires<Pred>; 1281} 1282multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1283 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1284 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1285 IntOp, IMMType, Pred>; 1286 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1287 IntOp, IMMType, Pred>; 1288} 1289 1290// atom_add 1291 1292def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1293 (atomic_load_add_32 node:$a, node:$b)>; 1294def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1295 (atomic_load_add_32 node:$a, node:$b)>; 1296def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1297 (atomic_load_add_32 node:$a, node:$b)>; 1298def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1299 (atomic_load_add_64 node:$a, node:$b)>; 1300def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1301 (atomic_load_add_64 node:$a, node:$b)>; 1302def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1303 (atomic_load_add_64 node:$a, node:$b)>; 1304def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1305 (atomic_load_fadd node:$a, node:$b)>; 1306def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1307 (atomic_load_fadd node:$a, node:$b)>; 1308def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1309 (atomic_load_fadd node:$a, node:$b)>; 1310 1311defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1312 atomic_load_add_32_g, i32imm, imm>; 1313defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1314 atomic_load_add_32_s, i32imm, imm>; 1315defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1316 atomic_load_add_32_gen, i32imm, imm>; 1317defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1318 ".add", atomic_load_add_32_gen, i32imm, imm>; 1319 1320defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1321 atomic_load_add_64_g, i64imm, imm>; 1322defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1323 atomic_load_add_64_s, i64imm, imm>; 1324defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1325 atomic_load_add_64_gen, i64imm, imm>; 1326defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1327 ".add", atomic_load_add_64_gen, i64imm, imm>; 1328 1329defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1330 atomic_load_add_g, f32imm, fpimm>; 1331defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1332 atomic_load_add_s, f32imm, fpimm>; 1333defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1334 atomic_load_add_gen, f32imm, fpimm>; 1335 1336defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1337 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1338defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1339 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1340defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1341 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1342 1343// atom_sub 1344 1345def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1346 (atomic_load_sub_32 node:$a, node:$b)>; 1347def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1348 (atomic_load_sub_32 node:$a, node:$b)>; 1349def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1350 (atomic_load_sub_32 node:$a, node:$b)>; 1351def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1352 (atomic_load_sub_64 node:$a, node:$b)>; 1353def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1354 (atomic_load_sub_64 node:$a, node:$b)>; 1355def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1356 (atomic_load_sub_64 node:$a, node:$b)>; 1357 1358defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1359 atomic_load_sub_32_g>; 1360defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1361 atomic_load_sub_64_g>; 1362defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1363 atomic_load_sub_32_gen>; 1364defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1365 ".add", atomic_load_sub_32_gen>; 1366defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1367 atomic_load_sub_32_s>; 1368defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1369 atomic_load_sub_64_s>; 1370defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1371 atomic_load_sub_64_gen>; 1372defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1373 ".add", atomic_load_sub_64_gen>; 1374 1375// atom_swap 1376 1377def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1378 (atomic_swap_32 node:$a, node:$b)>; 1379def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1380 (atomic_swap_32 node:$a, node:$b)>; 1381def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1382 (atomic_swap_32 node:$a, node:$b)>; 1383def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1384 (atomic_swap_64 node:$a, node:$b)>; 1385def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1386 (atomic_swap_64 node:$a, node:$b)>; 1387def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1388 (atomic_swap_64 node:$a, node:$b)>; 1389 1390defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1391 atomic_swap_32_g, i32imm, imm>; 1392defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1393 atomic_swap_32_s, i32imm, imm>; 1394defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1395 atomic_swap_32_gen, i32imm, imm>; 1396defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1397 ".exch", atomic_swap_32_gen, i32imm, imm>; 1398defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1399 atomic_swap_64_g, i64imm, imm>; 1400defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1401 atomic_swap_64_s, i64imm, imm>; 1402defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1403 atomic_swap_64_gen, i64imm, imm>; 1404defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1405 ".exch", atomic_swap_64_gen, i64imm, imm>; 1406 1407// atom_max 1408 1409def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1410 , (atomic_load_max_32 node:$a, node:$b)>; 1411def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1412 (atomic_load_max_32 node:$a, node:$b)>; 1413def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1414 (atomic_load_max_32 node:$a, node:$b)>; 1415def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1416 , (atomic_load_max_64 node:$a, node:$b)>; 1417def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1418 (atomic_load_max_64 node:$a, node:$b)>; 1419def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1420 (atomic_load_max_64 node:$a, node:$b)>; 1421def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1422 (atomic_load_umax_32 node:$a, node:$b)>; 1423def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1424 (atomic_load_umax_32 node:$a, node:$b)>; 1425def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1426 (atomic_load_umax_32 node:$a, node:$b)>; 1427def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1428 (atomic_load_umax_64 node:$a, node:$b)>; 1429def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1430 (atomic_load_umax_64 node:$a, node:$b)>; 1431def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1432 (atomic_load_umax_64 node:$a, node:$b)>; 1433 1434defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1435 ".max", atomic_load_max_32_g, i32imm, imm>; 1436defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1437 ".max", atomic_load_max_32_s, i32imm, imm>; 1438defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1439 atomic_load_max_32_gen, i32imm, imm>; 1440defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1441 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1442defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1443 ".max", atomic_load_max_64_g, i64imm, imm>; 1444defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1445 ".max", atomic_load_max_64_s, i64imm, imm>; 1446defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1447 atomic_load_max_64_gen, i64imm, imm>; 1448defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1449 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>; 1450defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1451 ".max", atomic_load_umax_32_g, i32imm, imm>; 1452defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1453 ".max", atomic_load_umax_32_s, i32imm, imm>; 1454defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1455 atomic_load_umax_32_gen, i32imm, imm>; 1456defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1457 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1458defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1459 ".max", atomic_load_umax_64_g, i64imm, imm>; 1460defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1461 ".max", atomic_load_umax_64_s, i64imm, imm>; 1462defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1463 atomic_load_umax_64_gen, i64imm, imm>; 1464defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1465 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>; 1466 1467// atom_min 1468 1469def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1470 (atomic_load_min_32 node:$a, node:$b)>; 1471def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1472 (atomic_load_min_32 node:$a, node:$b)>; 1473def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1474 (atomic_load_min_32 node:$a, node:$b)>; 1475def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1476 (atomic_load_min_64 node:$a, node:$b)>; 1477def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1478 (atomic_load_min_64 node:$a, node:$b)>; 1479def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1480 (atomic_load_min_64 node:$a, node:$b)>; 1481def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1482 (atomic_load_umin_32 node:$a, node:$b)>; 1483def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1484 (atomic_load_umin_32 node:$a, node:$b)>; 1485def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1486 (atomic_load_umin_32 node:$a, node:$b)>; 1487def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1488 (atomic_load_umin_64 node:$a, node:$b)>; 1489def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1490 (atomic_load_umin_64 node:$a, node:$b)>; 1491def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1492 (atomic_load_umin_64 node:$a, node:$b)>; 1493 1494defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1495 ".min", atomic_load_min_32_g, i32imm, imm>; 1496defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1497 ".min", atomic_load_min_32_s, i32imm, imm>; 1498defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1499 atomic_load_min_32_gen, i32imm, imm>; 1500defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1501 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1502defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1503 ".min", atomic_load_min_64_g, i64imm, imm>; 1504defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1505 ".min", atomic_load_min_64_s, i64imm, imm>; 1506defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1507 atomic_load_min_64_gen, i64imm, imm>; 1508defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1509 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>; 1510defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1511 ".min", atomic_load_umin_32_g, i32imm, imm>; 1512defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1513 ".min", atomic_load_umin_32_s, i32imm, imm>; 1514defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1515 atomic_load_umin_32_gen, i32imm, imm>; 1516defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1517 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1518defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1519 ".min", atomic_load_umin_64_g, i64imm, imm>; 1520defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1521 ".min", atomic_load_umin_64_s, i64imm, imm>; 1522defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1523 atomic_load_umin_64_gen, i64imm, imm>; 1524defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1525 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>; 1526 1527// atom_inc atom_dec 1528 1529def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1530 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1531def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1532 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1533def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1534 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1535def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1536 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1537def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1538 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1539def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1540 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1541 1542defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1543 atomic_load_inc_32_g, i32imm, imm>; 1544defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1545 atomic_load_inc_32_s, i32imm, imm>; 1546defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1547 atomic_load_inc_32_gen, i32imm, imm>; 1548defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1549 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1550defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1551 atomic_load_dec_32_g, i32imm, imm>; 1552defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1553 atomic_load_dec_32_s, i32imm, imm>; 1554defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1555 atomic_load_dec_32_gen, i32imm, imm>; 1556defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1557 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1558 1559// atom_and 1560 1561def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1562 (atomic_load_and_32 node:$a, node:$b)>; 1563def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1564 (atomic_load_and_32 node:$a, node:$b)>; 1565def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1566 (atomic_load_and_32 node:$a, node:$b)>; 1567def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1568 (atomic_load_and_64 node:$a, node:$b)>; 1569def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1570 (atomic_load_and_64 node:$a, node:$b)>; 1571def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1572 (atomic_load_and_64 node:$a, node:$b)>; 1573 1574defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1575 atomic_load_and_32_g, i32imm, imm>; 1576defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1577 atomic_load_and_32_s, i32imm, imm>; 1578defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1579 atomic_load_and_32_gen, i32imm, imm>; 1580defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1581 ".and", atomic_load_and_32_gen, i32imm, imm>; 1582defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1583 atomic_load_and_64_g, i64imm, imm>; 1584defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1585 atomic_load_and_64_s, i64imm, imm>; 1586defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1587 atomic_load_and_64_gen, i64imm, imm>; 1588defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1589 ".and", atomic_load_and_64_gen, i64imm, imm>; 1590 1591// atom_or 1592 1593def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1594 (atomic_load_or_32 node:$a, node:$b)>; 1595def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1596 (atomic_load_or_32 node:$a, node:$b)>; 1597def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1598 (atomic_load_or_32 node:$a, node:$b)>; 1599def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1600 (atomic_load_or_64 node:$a, node:$b)>; 1601def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1602 (atomic_load_or_64 node:$a, node:$b)>; 1603def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1604 (atomic_load_or_64 node:$a, node:$b)>; 1605 1606defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1607 atomic_load_or_32_g, i32imm, imm>; 1608defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1609 atomic_load_or_32_gen, i32imm, imm>; 1610defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1611 ".or", atomic_load_or_32_gen, i32imm, imm>; 1612defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1613 atomic_load_or_32_s, i32imm, imm>; 1614defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1615 atomic_load_or_64_g, i64imm, imm>; 1616defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1617 atomic_load_or_64_gen, i64imm, imm>; 1618defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1619 ".or", atomic_load_or_64_gen, i64imm, imm>; 1620defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1621 atomic_load_or_64_s, i64imm, imm>; 1622 1623// atom_xor 1624 1625def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1626 (atomic_load_xor_32 node:$a, node:$b)>; 1627def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1628 (atomic_load_xor_32 node:$a, node:$b)>; 1629def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1630 (atomic_load_xor_32 node:$a, node:$b)>; 1631def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1632 (atomic_load_xor_64 node:$a, node:$b)>; 1633def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1634 (atomic_load_xor_64 node:$a, node:$b)>; 1635def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1636 (atomic_load_xor_64 node:$a, node:$b)>; 1637 1638defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1639 atomic_load_xor_32_g, i32imm, imm>; 1640defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1641 atomic_load_xor_32_s, i32imm, imm>; 1642defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1643 atomic_load_xor_32_gen, i32imm, imm>; 1644defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1645 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1646defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1647 atomic_load_xor_64_g, i64imm, imm>; 1648defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1649 atomic_load_xor_64_s, i64imm, imm>; 1650defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1651 atomic_load_xor_64_gen, i64imm, imm>; 1652defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1653 ".xor", atomic_load_xor_64_gen, i64imm, imm>; 1654 1655// atom_cas 1656 1657def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1658 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1659def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1660 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1661def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1662 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1663def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1664 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1665def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1666 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1667def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1668 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1669 1670defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1671 atomic_cmp_swap_32_g, i32imm>; 1672defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1673 atomic_cmp_swap_32_s, i32imm>; 1674defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1675 atomic_cmp_swap_32_gen, i32imm>; 1676defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1677 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1678defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1679 atomic_cmp_swap_64_g, i64imm>; 1680defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1681 atomic_cmp_swap_64_s, i64imm>; 1682defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1683 atomic_cmp_swap_64_gen, i64imm>; 1684defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1685 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1686 1687// Support for scoped atomic operations. Matches 1688// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1689// and converts it into the appropriate instruction. 1690// NOTE: not all possible combinations are implemented 1691// 'space' is limited to generic as it's the only one needed to support CUDA. 1692// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1693class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1694 dag ins, dag Operands> 1695 : NVPTXInst<(outs regclass:$result), ins, 1696 AsmStr, 1697 [(set regclass:$result, Operands)]>, 1698 Requires<Preds>; 1699 1700// Define instruction variants for all addressing modes. 1701multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1702 NVPTXRegClass regclass, Operand ImmType, 1703 SDNode Imm, ValueType ImmTy, 1704 list<Predicate> Preds> { 1705 let AddedComplexity = 1 in { 1706 def : ATOM23_impl<AsmStr, regclass, Preds, 1707 (ins Int32Regs:$src, regclass:$b), 1708 (Intr Int32Regs:$src, regclass:$b)>; 1709 def : ATOM23_impl<AsmStr, regclass, Preds, 1710 (ins Int64Regs:$src, regclass:$b), 1711 (Intr Int64Regs:$src, regclass:$b)>; 1712 } 1713 // tablegen can't infer argument types from Intrinsic (though it can 1714 // from Instruction) so we have to enforce specific type on 1715 // immediates via explicit cast to ImmTy. 1716 def : ATOM23_impl<AsmStr, regclass, Preds, 1717 (ins Int32Regs:$src, ImmType:$b), 1718 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1719 def : ATOM23_impl<AsmStr, regclass, Preds, 1720 (ins Int64Regs:$src, ImmType:$b), 1721 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1722} 1723 1724multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1725 NVPTXRegClass regclass, Operand ImmType, 1726 SDNode Imm, ValueType ImmTy, 1727 list<Predicate> Preds> { 1728 // Variants for register/immediate permutations of $b and $c 1729 let AddedComplexity = 2 in { 1730 def : ATOM23_impl<AsmStr, regclass, Preds, 1731 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1732 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1733 def : ATOM23_impl<AsmStr, regclass, Preds, 1734 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1735 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1736 } 1737 let AddedComplexity = 1 in { 1738 def : ATOM23_impl<AsmStr, regclass, Preds, 1739 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1740 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1741 def : ATOM23_impl<AsmStr, regclass, Preds, 1742 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1743 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1744 def : ATOM23_impl<AsmStr, regclass, Preds, 1745 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1746 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1747 def : ATOM23_impl<AsmStr, regclass, Preds, 1748 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 1749 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1750 } 1751 def : ATOM23_impl<AsmStr, regclass, Preds, 1752 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 1753 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1754 def : ATOM23_impl<AsmStr, regclass, Preds, 1755 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 1756 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1757} 1758 1759// Constructs instrinsic name and instruction asm strings. 1760multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 1761 string ScopeStr, string SpaceStr, 1762 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1763 ValueType ImmTy, list<Predicate> Preds> { 1764 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1765 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1766 # "." # OpStr # "." # TypeStr 1767 # " \t$result, [$src], $b;", 1768 !cast<Intrinsic>( 1769 "int_nvvm_atomic_" # OpStr 1770 # "_" # SpaceStr # "_" # IntTypeStr 1771 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1772 regclass, ImmType, Imm, ImmTy, Preds>; 1773} 1774multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 1775 string ScopeStr, string SpaceStr, 1776 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1777 ValueType ImmTy, list<Predicate> Preds> { 1778 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1779 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1780 # "." # OpStr # "." # TypeStr 1781 # " \t$result, [$src], $b, $c;", 1782 !cast<Intrinsic>( 1783 "int_nvvm_atomic_" # OpStr 1784 # "_" # SpaceStr # "_" # IntTypeStr 1785 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1786 regclass, ImmType, Imm, ImmTy, Preds>; 1787} 1788 1789// Constructs variants for different address spaces. 1790// For now we only need variants for generic space pointers. 1791multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 1792 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1793 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1794 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1795 regclass, ImmType, Imm, ImmTy, Preds>; 1796} 1797multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 1798 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1799 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1800 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1801 regclass, ImmType, Imm, ImmTy, Preds>; 1802} 1803 1804// Constructs variants for different scopes of atomic op. 1805multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 1806 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1807 ValueType ImmTy, list<Predicate> Preds> { 1808 // .gpu scope is default and is currently covered by existing 1809 // atomics w/o explicitly specified scope. 1810 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1811 regclass, ImmType, Imm, ImmTy, 1812 !listconcat(Preds,[hasAtomScope])>; 1813 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1814 regclass, ImmType, Imm, ImmTy, 1815 !listconcat(Preds,[hasAtomScope])>; 1816} 1817multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 1818 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 1819 list<Predicate> Preds> { 1820 // No need to define ".gpu"-scoped atomics. They do the same thing 1821 // as the regular, non-scoped atomics defined elsewhere. 1822 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1823 regclass, ImmType, Imm, ImmTy, 1824 !listconcat(Preds,[hasAtomScope])>; 1825 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1826 regclass, ImmType, Imm, ImmTy, 1827 !listconcat(Preds,[hasAtomScope])>; 1828} 1829 1830// atom.add 1831multiclass ATOM2_add_impl<string OpStr> { 1832 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1833 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1834 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 1835 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 1836 []>; 1837 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 1838 [hasAtomAddF64]>; 1839} 1840 1841// atom.{and,or,xor} 1842multiclass ATOM2_bitwise_impl<string OpStr> { 1843 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1844 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 1845 [hasAtomBitwise64]>; 1846} 1847 1848// atom.exch 1849multiclass ATOM2_exch_impl<string OpStr> { 1850 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1851 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1852} 1853 1854// atom.{min,max} 1855multiclass ATOM2_minmax_impl<string OpStr> { 1856 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1857 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1858 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 1859 [hasAtomMinMax64]>; 1860 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 1861 [hasAtomMinMax64]>; 1862} 1863 1864// atom.{inc,dec} 1865multiclass ATOM2_incdec_impl<string OpStr> { 1866 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1867} 1868 1869// atom.cas 1870multiclass ATOM3_cas_impl<string OpStr> { 1871 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1872 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1873} 1874 1875defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 1876defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 1877defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 1878defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 1879defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 1880defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 1881defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 1882defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 1883defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 1884defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 1885 1886//----------------------------------- 1887// Support for ldu on sm_20 or later 1888//----------------------------------- 1889 1890// Don't annotate ldu instructions as mayLoad, as they load from memory that is 1891// read-only in a kernel. 1892 1893// Scalar 1894 1895multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 1896 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1897 !strconcat("ldu.global.", TyStr), 1898 []>, Requires<[hasLDU]>; 1899 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1900 !strconcat("ldu.global.", TyStr), 1901 []>, Requires<[hasLDU]>; 1902 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1903 !strconcat("ldu.global.", TyStr), 1904 []>, Requires<[hasLDU]>; 1905 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1906 !strconcat("ldu.global.", TyStr), 1907 []>, Requires<[hasLDU]>; 1908 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1909 !strconcat("ldu.global.", TyStr), 1910 []>, Requires<[hasLDU]>; 1911} 1912 1913defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 1914defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 1915defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1916defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1917defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; 1918defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; 1919defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 1920defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 1921defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1922defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1923 1924// vector 1925 1926// Elementized vector ldu 1927multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1928 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1929 (ins Int32Regs:$src), 1930 !strconcat("ldu.global.", TyStr), []>; 1931 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1932 (ins Int64Regs:$src), 1933 !strconcat("ldu.global.", TyStr), []>; 1934 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1935 (ins MEMri:$src), 1936 !strconcat("ldu.global.", TyStr), []>; 1937 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1938 (ins MEMri64:$src), 1939 !strconcat("ldu.global.", TyStr), []>; 1940 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1941 (ins imemAny:$src), 1942 !strconcat("ldu.global.", TyStr), []>; 1943} 1944 1945multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1946 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1947 regclass:$dst4), (ins Int32Regs:$src), 1948 !strconcat("ldu.global.", TyStr), []>; 1949 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1950 regclass:$dst4), (ins Int64Regs:$src), 1951 !strconcat("ldu.global.", TyStr), []>; 1952 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1953 regclass:$dst4), (ins MEMri:$src), 1954 !strconcat("ldu.global.", TyStr), []>; 1955 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1956 regclass:$dst4), (ins MEMri64:$src), 1957 !strconcat("ldu.global.", TyStr), []>; 1958 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1959 regclass:$dst4), (ins imemAny:$src), 1960 !strconcat("ldu.global.", TyStr), []>; 1961} 1962 1963defm INT_PTX_LDU_G_v2i8_ELE 1964 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1965defm INT_PTX_LDU_G_v2i16_ELE 1966 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1967defm INT_PTX_LDU_G_v2i32_ELE 1968 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1969defm INT_PTX_LDU_G_v2f16_ELE 1970 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1971defm INT_PTX_LDU_G_v2f16x2_ELE 1972 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1973defm INT_PTX_LDU_G_v2f32_ELE 1974 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1975defm INT_PTX_LDU_G_v2i64_ELE 1976 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1977defm INT_PTX_LDU_G_v2f64_ELE 1978 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1979defm INT_PTX_LDU_G_v4i8_ELE 1980 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1981defm INT_PTX_LDU_G_v4i16_ELE 1982 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1983 Int16Regs>; 1984defm INT_PTX_LDU_G_v4i32_ELE 1985 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1986 Int32Regs>; 1987defm INT_PTX_LDU_G_v4f16_ELE 1988 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1989 Float16Regs>; 1990defm INT_PTX_LDU_G_v4f16x2_ELE 1991 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1992 Float16x2Regs>; 1993defm INT_PTX_LDU_G_v4f32_ELE 1994 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1995 Float32Regs>; 1996 1997 1998//----------------------------------- 1999// Support for ldg on sm_35 or later 2000//----------------------------------- 2001 2002// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2003// non-coherent texture cache, and therefore the values read must be read-only 2004// during the lifetime of the kernel. 2005 2006multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2007 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2008 !strconcat("ld.global.nc.", TyStr), 2009 []>, Requires<[hasLDG]>; 2010 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2011 !strconcat("ld.global.nc.", TyStr), 2012 []>, Requires<[hasLDG]>; 2013 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2014 !strconcat("ld.global.nc.", TyStr), 2015 []>, Requires<[hasLDG]>; 2016 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2017 !strconcat("ld.global.nc.", TyStr), 2018 []>, Requires<[hasLDG]>; 2019 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2020 !strconcat("ld.global.nc.", TyStr), 2021 []>, Requires<[hasLDG]>; 2022} 2023 2024defm INT_PTX_LDG_GLOBAL_i8 2025 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2026defm INT_PTX_LDG_GLOBAL_i16 2027 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2028defm INT_PTX_LDG_GLOBAL_i32 2029 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2030defm INT_PTX_LDG_GLOBAL_i64 2031 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2032defm INT_PTX_LDG_GLOBAL_f16 2033 : LDG_G<"b16 \t$result, [$src];", Float16Regs>; 2034defm INT_PTX_LDG_GLOBAL_f16x2 2035 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; 2036defm INT_PTX_LDG_GLOBAL_f32 2037 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2038defm INT_PTX_LDG_GLOBAL_f64 2039 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2040defm INT_PTX_LDG_GLOBAL_p32 2041 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2042defm INT_PTX_LDG_GLOBAL_p64 2043 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2044 2045// vector 2046 2047// Elementized vector ldg 2048multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2049 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2050 (ins Int32Regs:$src), 2051 !strconcat("ld.global.nc.", TyStr), []>; 2052 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2053 (ins Int64Regs:$src), 2054 !strconcat("ld.global.nc.", TyStr), []>; 2055 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2056 (ins MEMri:$src), 2057 !strconcat("ld.global.nc.", TyStr), []>; 2058 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2059 (ins MEMri64:$src), 2060 !strconcat("ld.global.nc.", TyStr), []>; 2061 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2062 (ins imemAny:$src), 2063 !strconcat("ld.global.nc.", TyStr), []>; 2064} 2065 2066multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2067 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2068 regclass:$dst4), (ins Int32Regs:$src), 2069 !strconcat("ld.global.nc.", TyStr), []>; 2070 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2071 regclass:$dst4), (ins Int64Regs:$src), 2072 !strconcat("ld.global.nc.", TyStr), []>; 2073 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2074 regclass:$dst4), (ins MEMri:$src), 2075 !strconcat("ld.global.nc.", TyStr), []>; 2076 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2077 regclass:$dst4), (ins MEMri64:$src), 2078 !strconcat("ld.global.nc.", TyStr), []>; 2079 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2080 regclass:$dst4), (ins imemAny:$src), 2081 !strconcat("ld.global.nc.", TyStr), []>; 2082} 2083 2084// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2085defm INT_PTX_LDG_G_v2i8_ELE 2086 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2087defm INT_PTX_LDG_G_v2i16_ELE 2088 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2089defm INT_PTX_LDG_G_v2i32_ELE 2090 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2091defm INT_PTX_LDG_G_v2f16_ELE 2092 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 2093defm INT_PTX_LDG_G_v2f16x2_ELE 2094 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 2095defm INT_PTX_LDG_G_v2f32_ELE 2096 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2097defm INT_PTX_LDG_G_v2i64_ELE 2098 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2099defm INT_PTX_LDG_G_v2f64_ELE 2100 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2101defm INT_PTX_LDG_G_v4i8_ELE 2102 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2103defm INT_PTX_LDG_G_v4i16_ELE 2104 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2105defm INT_PTX_LDG_G_v4i32_ELE 2106 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2107defm INT_PTX_LDG_G_v4f16_ELE 2108 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; 2109defm INT_PTX_LDG_G_v4f16x2_ELE 2110 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; 2111defm INT_PTX_LDG_G_v4f32_ELE 2112 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2113 2114 2115multiclass NG_TO_G<string Str, Intrinsic Intrin> { 2116 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2117 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 2118 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2119 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2120 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 2121 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2122 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 2123 "{{ .reg .b64 %tmp;\n\t" 2124 #" cvt.u64.u32 \t%tmp, $src;\n\t" 2125 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 2126 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 2127 Requires<[useShortPtr]>; 2128} 2129 2130multiclass G_TO_NG<string Str, Intrinsic Intrin> { 2131 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2132 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 2133 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2134 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2135 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 2136 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2137 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 2138 "{{ .reg .b64 %tmp;\n\t" 2139 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 2140 #" cvt.u32.u64 \t$result, %tmp; }}", 2141 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 2142 Requires<[useShortPtr]>; 2143} 2144 2145defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 2146defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 2147defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 2148defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 2149 2150defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 2151defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 2152defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 2153defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 2154 2155 2156// nvvm.ptr.gen.to.param 2157def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 2158 (ins Int32Regs:$src), 2159 "mov.u32 \t$result, $src;", 2160 [(set Int32Regs:$result, 2161 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 2162def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 2163 (ins Int64Regs:$src), 2164 "mov.u64 \t$result, $src;", 2165 [(set Int64Regs:$result, 2166 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2167 2168 2169// nvvm.move intrinsicc 2170def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2171 "mov.b16 \t$r, $s;", 2172 [(set Int16Regs:$r, 2173 (int_nvvm_move_i16 Int16Regs:$s))]>; 2174def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2175 "mov.b32 \t$r, $s;", 2176 [(set Int32Regs:$r, 2177 (int_nvvm_move_i32 Int32Regs:$s))]>; 2178def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2179 "mov.b64 \t$r, $s;", 2180 [(set Int64Regs:$r, 2181 (int_nvvm_move_i64 Int64Regs:$s))]>; 2182def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2183 "mov.f32 \t$r, $s;", 2184 [(set Float32Regs:$r, 2185 (int_nvvm_move_float Float32Regs:$s))]>; 2186def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2187 "mov.f64 \t$r, $s;", 2188 [(set Float64Regs:$r, 2189 (int_nvvm_move_double Float64Regs:$s))]>; 2190def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2191 "mov.u32 \t$r, $s;", 2192 [(set Int32Regs:$r, 2193 (int_nvvm_move_ptr Int32Regs:$s))]>; 2194def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2195 "mov.u64 \t$r, $s;", 2196 [(set Int64Regs:$r, 2197 (int_nvvm_move_ptr Int64Regs:$s))]>; 2198 2199// @TODO: Are these actually needed, or will we always just see symbols 2200// copied to registers first? 2201/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2202 "mov.u32 \t$r, $s;", 2203 [(set Int32Regs:$r, 2204 (int_nvvm_move_ptr texternalsym:$s))]>; 2205def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2206 "mov.u64 \t$r, $s;", 2207 [(set Int64Regs:$r, 2208 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2209 2210 2211// MoveParam %r1, param 2212// ptr_local_to_gen %r2, %r1 2213// ptr_gen_to_local %r3, %r2 2214// -> 2215// mov %r1, param 2216 2217// @TODO: Revisit this. There is a type 2218// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2219// instructions are not currently defined. However, we can use the ptr 2220// variants and the asm printer will do the right thing. 2221def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2222 (MoveParam texternalsym:$src)))), 2223 (nvvm_move_ptr64 texternalsym:$src)>; 2224def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2225 (MoveParam texternalsym:$src)))), 2226 (nvvm_move_ptr32 texternalsym:$src)>; 2227 2228def texsurf_handles 2229 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2230 "mov.u64 \t$result, $src;", []>; 2231 2232//----------------------------------- 2233// Compiler Error Warn 2234// - Just ignore them in codegen 2235//----------------------------------- 2236 2237def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2238 "// llvm.nvvm.compiler.warn()", 2239 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2240def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2241 "// llvm.nvvm.compiler.warn()", 2242 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2243def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2244 "// llvm.nvvm.compiler.error()", 2245 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2246def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2247 "// llvm.nvvm.compiler.error()", 2248 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2249 2250 2251// isspacep 2252 2253def ISSPACEP_CONST_32 2254 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2255 "isspacep.const \t$d, $a;", 2256 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 2257 Requires<[hasPTX31]>; 2258def ISSPACEP_CONST_64 2259 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2260 "isspacep.const \t$d, $a;", 2261 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 2262 Requires<[hasPTX31]>; 2263def ISSPACEP_GLOBAL_32 2264 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2265 "isspacep.global \t$d, $a;", 2266 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 2267def ISSPACEP_GLOBAL_64 2268 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2269 "isspacep.global \t$d, $a;", 2270 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 2271def ISSPACEP_LOCAL_32 2272 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2273 "isspacep.local \t$d, $a;", 2274 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 2275def ISSPACEP_LOCAL_64 2276 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2277 "isspacep.local \t$d, $a;", 2278 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 2279def ISSPACEP_SHARED_32 2280 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2281 "isspacep.shared \t$d, $a;", 2282 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 2283def ISSPACEP_SHARED_64 2284 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2285 "isspacep.shared \t$d, $a;", 2286 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 2287 2288 2289// Special register reads 2290def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2291 (ins SpecialRegs:$r), 2292 "mov.b32 \t$d, $r;", []>; 2293 2294def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2295def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2296def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2297def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2298def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2299def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2300def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2301def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2302def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2303def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2304def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2305def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2306def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2307def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2308def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2309def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2310def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2311def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2312def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2313def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2314def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2315def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2316def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2317def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2318def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2319def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2320def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2321def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2322def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2323def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2324def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2325def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2326 2327 2328// rotate builtin support 2329 2330def ROTATE_B32_HW_IMM 2331 : NVPTXInst<(outs Int32Regs:$dst), 2332 (ins Int32Regs:$src, i32imm:$amt), 2333 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2334 [(set Int32Regs:$dst, 2335 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2336 Requires<[hasHWROT32]> ; 2337 2338def ROTATE_B32_HW_REG 2339 : NVPTXInst<(outs Int32Regs:$dst), 2340 (ins Int32Regs:$src, Int32Regs:$amt), 2341 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2342 [(set Int32Regs:$dst, 2343 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2344 Requires<[hasHWROT32]> ; 2345 2346def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2347 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2348 Requires<[noHWROT32]> ; 2349 2350def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2351 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2352 Requires<[noHWROT32]> ; 2353 2354let hasSideEffects = false in { 2355 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2356 !strconcat("{{\n\t", 2357 ".reg .b32 %dummy;\n\t", 2358 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2359 "}}"), 2360 []> ; 2361 2362 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2363 !strconcat("{{\n\t", 2364 ".reg .b32 %dummy;\n\t", 2365 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2366 "}}"), 2367 []> ; 2368} 2369 2370let hasSideEffects = false in { 2371 def PACK_TWO_INT32 2372 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2373 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2374} 2375 2376def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2377 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2378 (GET_LO_INT64 Int64Regs:$src))> ; 2379 2380// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2381// no side effects. 2382let hasSideEffects = false in { 2383 def SHF_L_WRAP_B32_IMM 2384 : NVPTXInst<(outs Int32Regs:$dst), 2385 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2386 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2387 Requires<[hasHWROT32]>; 2388 2389 def SHF_L_WRAP_B32_REG 2390 : NVPTXInst<(outs Int32Regs:$dst), 2391 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2392 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2393 Requires<[hasHWROT32]>; 2394 2395 def SHF_R_WRAP_B32_IMM 2396 : NVPTXInst<(outs Int32Regs:$dst), 2397 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2398 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2399 Requires<[hasHWROT32]>; 2400 2401 def SHF_R_WRAP_B32_REG 2402 : NVPTXInst<(outs Int32Regs:$dst), 2403 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2404 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2405 Requires<[hasHWROT32]>; 2406} 2407 2408// HW version of rotate 64 2409def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2410 (PACK_TWO_INT32 2411 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2412 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2413 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2414 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2415 Requires<[hasHWROT32]>; 2416 2417def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2418 (PACK_TWO_INT32 2419 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2420 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2421 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2422 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2423 Requires<[hasHWROT32]>; 2424 2425 2426def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2427 (PACK_TWO_INT32 2428 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2429 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2430 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2431 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2432 Requires<[hasHWROT32]>; 2433 2434def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2435 (PACK_TWO_INT32 2436 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2437 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2438 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2439 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2440 Requires<[hasHWROT32]>; 2441 2442// SW version of rotate 64 2443def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2444 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2445 Requires<[noHWROT32]>; 2446def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2447 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2448 Requires<[noHWROT32]>; 2449def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2450 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2451 Requires<[noHWROT32]>; 2452def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2453 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2454 Requires<[noHWROT32]>; 2455 2456 2457//----------------------------------- 2458// Texture Intrinsics 2459//----------------------------------- 2460 2461// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2462// also defined in NVPTXReplaceImageHandles.cpp 2463 2464// texmode_independent 2465let IsTex = true, IsTexModeUnified = false in { 2466// Texture fetch instructions using handles 2467 2468class TEX_1D_base<string inst, NVPTXRegClass outtype, 2469 NVPTXRegClass intype, dag texsamp> 2470 : NVPTXInst<(outs outtype:$r, outtype:$g, 2471 outtype:$b, outtype:$a), 2472 !con(texsamp, (ins intype:$x)), 2473 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2474 []>; 2475 2476multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2477 def _RR : TEX_1D_base<inst, outtype, intype, 2478 (ins Int64Regs:$t, Int64Regs:$s)>; 2479 def _RI : TEX_1D_base<inst, outtype, intype, 2480 (ins Int64Regs:$t, i64imm:$s)>; 2481 def _IR : TEX_1D_base<inst, outtype, intype, 2482 (ins i64imm:$t, Int64Regs:$s)>; 2483 def _II : TEX_1D_base<inst, outtype, intype, 2484 (ins i64imm:$t, i64imm:$s)>; 2485} 2486 2487defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 2488defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2489defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 2490defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2491defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 2492defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2493 2494class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 2495 NVPTXRegClass intype, dag texsamp> 2496 : NVPTXInst<(outs outtype:$r, outtype:$g, 2497 outtype:$b, outtype:$a), 2498 !con(texsamp, (ins intype:$x, intype:$lod)), 2499 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", 2500 []>; 2501 2502multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, 2503 NVPTXRegClass intype> { 2504 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype, 2505 (ins Int64Regs:$t, Int64Regs:$s)>; 2506 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, 2507 (ins Int64Regs:$t, i64imm:$s)>; 2508 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, 2509 (ins i64imm:$t, Int64Regs:$s)>; 2510 def _II : TEX_1D_LEVEL_base<inst, outtype, intype, 2511 (ins i64imm:$t, i64imm:$s)>; 2512} 2513 2514defm TEX_1D_F32_F32_LEVEL : 2515 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2516defm TEX_1D_S32_F32_LEVEL : 2517 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2518defm TEX_1D_U32_F32_LEVEL : 2519 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2520 2521class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, 2522 NVPTXRegClass intype, dag texsamp> 2523 : NVPTXInst<(outs outtype:$r, outtype:$g, 2524 outtype:$b, outtype:$a), 2525 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), 2526 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," 2527 " \\{$gradx\\}, \\{$grady\\};", 2528 []>; 2529 2530multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, 2531 NVPTXRegClass intype> { 2532 def _RR : TEX_1D_GRAD_base<inst, outtype, intype, 2533 (ins Int64Regs:$t, Int64Regs:$s)>; 2534 def _RI : TEX_1D_GRAD_base<inst, outtype, intype, 2535 (ins Int64Regs:$t, i64imm:$s)>; 2536 def _IR : TEX_1D_GRAD_base<inst, outtype, intype, 2537 (ins i64imm:$t, Int64Regs:$s)>; 2538 def _II : TEX_1D_GRAD_base<inst, outtype, intype, 2539 (ins i64imm:$t, i64imm:$s)>; 2540} 2541 2542defm TEX_1D_F32_F32_GRAD 2543 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2544defm TEX_1D_S32_F32_GRAD 2545 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2546defm TEX_1D_U32_F32_GRAD 2547 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2548 2549class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 2550 NVPTXRegClass intype, dag texsamp> 2551 : NVPTXInst<(outs outtype:$r, outtype:$g, 2552 outtype:$b, outtype:$a), 2553 !con(texsamp, (ins Int32Regs:$l, intype:$x)), 2554 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", 2555 []>; 2556 2557multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, 2558 NVPTXRegClass intype> { 2559 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype, 2560 (ins Int64Regs:$t, Int64Regs:$s)>; 2561 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, 2562 (ins Int64Regs:$t, i64imm:$s)>; 2563 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, 2564 (ins i64imm:$t, Int64Regs:$s)>; 2565 def _II : TEX_1D_ARRAY_base<inst, outtype, intype, 2566 (ins i64imm:$t, i64imm:$s)>; 2567} 2568 2569defm TEX_1D_ARRAY_F32_F32 2570 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2571defm TEX_1D_ARRAY_F32_S32 2572 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 2573defm TEX_1D_ARRAY_S32_S32 2574 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 2575defm TEX_1D_ARRAY_S32_F32 2576 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2577defm TEX_1D_ARRAY_U32_S32 2578 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 2579defm TEX_1D_ARRAY_U32_F32 2580 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2581 2582class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2583 NVPTXRegClass intype, dag texsamp> 2584 : NVPTXInst<(outs outtype:$r, outtype:$g, 2585 outtype:$b, outtype:$a), 2586 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), 2587 inst # " \t\\{$r, $g, $b, $a\\}," 2588 " [$t, $s, \\{$l, $x\\}], $lod;", 2589 []>; 2590 2591multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2592 NVPTXRegClass intype> { 2593 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2594 (ins Int64Regs:$t, Int64Regs:$s)>; 2595 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2596 (ins Int64Regs:$t, i64imm:$s)>; 2597 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2598 (ins i64imm:$t, Int64Regs:$s)>; 2599 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2600 (ins i64imm:$t, i64imm:$s)>; 2601} 2602 2603defm TEX_1D_ARRAY_F32_F32_LEVEL 2604 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2605defm TEX_1D_ARRAY_S32_F32_LEVEL 2606 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2607defm TEX_1D_ARRAY_U32_F32_LEVEL 2608 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2609 2610class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2611 NVPTXRegClass intype, dag texsamp> 2612 : NVPTXInst<(outs outtype:$r, outtype:$g, 2613 outtype:$b, outtype:$a), 2614 !con(texsamp, (ins Int32Regs:$l, intype:$x, 2615 intype:$gradx, intype:$grady)), 2616 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," 2617 " \\{$gradx\\}, \\{$grady\\};", 2618 []>; 2619 2620multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 2621 NVPTXRegClass intype> { 2622 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2623 (ins Int64Regs:$t, Int64Regs:$s)>; 2624 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2625 (ins Int64Regs:$t, i64imm:$s)>; 2626 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2627 (ins i64imm:$t, Int64Regs:$s)>; 2628 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2629 (ins i64imm:$t, i64imm:$s)>; 2630} 2631 2632defm TEX_1D_ARRAY_F32_F32_GRAD 2633 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2634defm TEX_1D_ARRAY_S32_F32_GRAD 2635 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2636defm TEX_1D_ARRAY_U32_F32_GRAD 2637 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2638 2639class TEX_2D_base<string inst, NVPTXRegClass outtype, 2640 NVPTXRegClass intype, dag texsamp> 2641 : NVPTXInst<(outs outtype:$r, outtype:$g, 2642 outtype:$b, outtype:$a), 2643 !con(texsamp, (ins intype:$x, intype:$y)), 2644 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", 2645 []>; 2646 2647multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2648 def _RR : TEX_2D_base<inst, outtype, intype, 2649 (ins Int64Regs:$t, Int64Regs:$s)>; 2650 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; 2651 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; 2652 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; 2653} 2654 2655defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2656defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 2657defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 2658defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2659defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 2660defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2661 2662class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 2663 NVPTXRegClass intype, dag texsamp> 2664 : NVPTXInst<(outs outtype:$r, outtype:$g, 2665 outtype:$b, outtype:$a), 2666 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), 2667 inst # " \t\\{$r, $g, $b, $a\\}," 2668 " [$t, $s, \\{$x, $y\\}], $lod;", 2669 []>; 2670 2671multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, 2672 NVPTXRegClass intype> { 2673 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype, 2674 (ins Int64Regs:$t, Int64Regs:$s)>; 2675 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, 2676 (ins Int64Regs:$t, i64imm:$s)>; 2677 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, 2678 (ins i64imm:$t, Int64Regs:$s)>; 2679 def _II : TEX_2D_LEVEL_base<inst, outtype, intype, 2680 (ins i64imm:$t, i64imm:$s)>; 2681} 2682 2683defm TEX_2D_F32_F32_LEVEL : 2684 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2685defm TEX_2D_S32_F32_LEVEL : 2686 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2687defm TEX_2D_U32_F32_LEVEL : 2688 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2689 2690class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, 2691 NVPTXRegClass intype, dag texsamp> 2692 : NVPTXInst<(outs outtype:$r, outtype:$g, 2693 outtype:$b, outtype:$a), 2694 !con(texsamp, (ins intype:$x, intype:$y, 2695 intype:$gradx0, intype:$gradx1, 2696 intype:$grady0, intype:$grady1)), 2697 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," 2698 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 2699 []>; 2700 2701multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, 2702 NVPTXRegClass intype> { 2703 def _RR : TEX_2D_GRAD_base<inst, outtype, intype, 2704 (ins Int64Regs:$t, Int64Regs:$s)>; 2705 def _RI : TEX_2D_GRAD_base<inst, outtype, intype, 2706 (ins Int64Regs:$t, i64imm:$s)>; 2707 def _IR : TEX_2D_GRAD_base<inst, outtype, intype, 2708 (ins i64imm:$t, Int64Regs:$s)>; 2709 def _II : TEX_2D_GRAD_base<inst, outtype, intype, 2710 (ins i64imm:$t, i64imm:$s)>; 2711} 2712 2713defm TEX_2D_F32_F32_GRAD : 2714 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2715defm TEX_2D_S32_F32_GRAD : 2716 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2717defm TEX_2D_U32_F32_GRAD : 2718 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2719 2720class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 2721 NVPTXRegClass intype, dag texsamp> 2722 : NVPTXInst<(outs outtype:$r, outtype:$g, 2723 outtype:$b, outtype:$a), 2724 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), 2725 inst # " \t\\{$r, $g, $b, $a\\}," 2726 " [$t, $s, \\{$l, $x, $y, $y\\}];", 2727 []>; 2728 2729multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, 2730 NVPTXRegClass intype> { 2731 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype, 2732 (ins Int64Regs:$t, Int64Regs:$s)>; 2733 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, 2734 (ins Int64Regs:$t, i64imm:$s)>; 2735 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, 2736 (ins i64imm:$t, Int64Regs:$s)>; 2737 def _II : TEX_2D_ARRAY_base<inst, outtype, intype, 2738 (ins i64imm:$t, i64imm:$s)>; 2739} 2740 2741defm TEX_2D_ARRAY_F32_F32 2742 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2743defm TEX_2D_ARRAY_F32_S32 2744 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 2745defm TEX_2D_ARRAY_S32_S32 2746 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 2747defm TEX_2D_ARRAY_S32_F32 2748 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2749defm TEX_2D_ARRAY_U32_S32 2750 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 2751defm TEX_2D_ARRAY_U32_F32 2752 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2753 2754class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2755 NVPTXRegClass intype, dag texsamp> 2756 : NVPTXInst<(outs outtype:$r, outtype:$g, 2757 outtype:$b, outtype:$a), 2758 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2759 intype:$lod)), 2760 inst # " \t\\{$r, $g, $b, $a\\}," 2761 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2762 []>; 2763 2764multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2765 NVPTXRegClass intype> { 2766 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2767 (ins Int64Regs:$t, Int64Regs:$s)>; 2768 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2769 (ins Int64Regs:$t, i64imm:$s)>; 2770 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2771 (ins i64imm:$t, Int64Regs:$s)>; 2772 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2773 (ins i64imm:$t, i64imm:$s)>; 2774} 2775 2776defm TEX_2D_ARRAY_F32_F32_LEVEL 2777 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2778defm TEX_2D_ARRAY_S32_F32_LEVEL 2779 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2780defm TEX_2D_ARRAY_U32_F32_LEVEL 2781 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2782 2783class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2784 NVPTXRegClass intype, dag texsamp> 2785 : NVPTXInst<(outs outtype:$r, outtype:$g, 2786 outtype:$b, outtype:$a), 2787 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2788 intype:$gradx0, intype:$gradx1, 2789 intype:$grady0, intype:$grady1)), 2790 inst # " \t\\{$r, $g, $b, $a\\}," 2791 " [$t, $s, \\{$l, $x, $y, $y\\}]," 2792 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 2793 []>; 2794 2795multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 2796 NVPTXRegClass intype> { 2797 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 2798 (ins Int64Regs:$t, Int64Regs:$s)>; 2799 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 2800 (ins Int64Regs:$t, i64imm:$s)>; 2801 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 2802 (ins i64imm:$t, Int64Regs:$s)>; 2803 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 2804 (ins i64imm:$t, i64imm:$s)>; 2805} 2806 2807defm TEX_2D_ARRAY_F32_F32_GRAD 2808 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2809defm TEX_2D_ARRAY_S32_F32_GRAD 2810 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2811defm TEX_2D_ARRAY_U32_F32_GRAD 2812 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2813 2814class TEX_3D_base<string inst, NVPTXRegClass outtype, 2815 NVPTXRegClass intype, dag texsamp> 2816 : NVPTXInst<(outs outtype:$r, outtype:$g, 2817 outtype:$b, outtype:$a), 2818 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 2819 inst # " \t\\{$r, $g, $b, $a\\}," 2820 " [$t, $s, \\{$x, $y, $z, $z\\}];", 2821 []>; 2822 2823multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2824 def _RR : TEX_3D_base<inst, outtype, intype, 2825 (ins Int64Regs:$t, Int64Regs:$s)>; 2826 def _RI : TEX_3D_base<inst, outtype, intype, 2827 (ins Int64Regs:$t, i64imm:$s)>; 2828 def _IR : TEX_3D_base<inst, outtype, intype, 2829 (ins i64imm:$t, Int64Regs:$s)>; 2830 def _II : TEX_3D_base<inst, outtype, intype, 2831 (ins i64imm:$t, i64imm:$s)>; 2832} 2833 2834defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 2835defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 2836defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 2837defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 2838defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 2839defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 2840 2841class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 2842 NVPTXRegClass intype, dag texsamp> 2843 : NVPTXInst<(outs outtype:$r, outtype:$g, 2844 outtype:$b, outtype:$a), 2845 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 2846 intype:$lod)), 2847 inst # " \t\\{$r, $g, $b, $a\\}," 2848 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2849 []>; 2850 2851multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, 2852 NVPTXRegClass intype> { 2853 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype, 2854 (ins Int64Regs:$t, Int64Regs:$s)>; 2855 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, 2856 (ins Int64Regs:$t, i64imm:$s)>; 2857 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, 2858 (ins i64imm:$t, Int64Regs:$s)>; 2859 def _II : TEX_3D_LEVEL_base<inst, outtype, intype, 2860 (ins i64imm:$t, i64imm:$s)>; 2861} 2862 2863defm TEX_3D_F32_F32_LEVEL 2864 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 2865defm TEX_3D_S32_F32_LEVEL 2866 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 2867defm TEX_3D_U32_F32_LEVEL 2868 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 2869 2870class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, 2871 NVPTXRegClass intype, dag texsamp> 2872 : NVPTXInst<(outs outtype:$r, outtype:$g, 2873 outtype:$b, outtype:$a), 2874 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 2875 intype :$gradx0, intype:$gradx1, 2876 intype:$gradx2, intype:$grady0, 2877 intype:$grady1, intype:$grady2)), 2878 inst # " \t\\{$r, $g, $b, $a\\}," 2879 " [$t, $s, \\{$x, $y, $z, $z\\}]," 2880 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 2881 " \\{$grady0, $grady1, $grady2, $grady2\\};", 2882 []>; 2883 2884multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, 2885 NVPTXRegClass intype> { 2886 def _RR : TEX_3D_GRAD_base<inst, outtype, intype, 2887 (ins Int64Regs:$t, Int64Regs:$s)>; 2888 def _RI : TEX_3D_GRAD_base<inst, outtype, intype, 2889 (ins Int64Regs:$t, i64imm:$s)>; 2890 def _IR : TEX_3D_GRAD_base<inst, outtype, intype, 2891 (ins i64imm:$t, Int64Regs:$s)>; 2892 def _II : TEX_3D_GRAD_base<inst, outtype, intype, 2893 (ins i64imm:$t, i64imm:$s)>; 2894} 2895 2896defm TEX_3D_F32_F32_GRAD 2897 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 2898defm TEX_3D_S32_F32_GRAD 2899 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 2900defm TEX_3D_U32_F32_GRAD 2901 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 2902 2903class TEX_CUBE_base<string inst, NVPTXRegClass outtype, 2904 NVPTXRegClass intype, dag texsamp> 2905 : NVPTXInst<(outs outtype:$r, outtype:$g, 2906 outtype:$b, outtype:$a), 2907 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 2908 inst # " \t\\{$r, $g, $b, $a\\}," 2909 " [$t, $s, \\{$x, $y, $z, $z\\}];", 2910 []>; 2911 2912multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2913 def _RR : TEX_CUBE_base<inst, outtype, intype, 2914 (ins Int64Regs:$t, Int64Regs:$s)>; 2915 def _RI : TEX_CUBE_base<inst, outtype, intype, 2916 (ins Int64Regs:$t, i64imm:$s)>; 2917 def _IR : TEX_CUBE_base<inst, outtype, intype, 2918 (ins i64imm:$t, Int64Regs:$s)>; 2919 def _II : TEX_CUBE_base<inst, outtype, intype, 2920 (ins i64imm:$t, i64imm:$s)>; 2921} 2922 2923defm TEX_CUBE_F32_F32 2924 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 2925defm TEX_CUBE_S32_F32 2926 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 2927defm TEX_CUBE_U32_F32 2928 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 2929 2930class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 2931 NVPTXRegClass intype, dag texsamp> 2932 : NVPTXInst<(outs outtype:$r, outtype:$g, 2933 outtype:$b, outtype:$a), 2934 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 2935 intype:$lod)), 2936 inst # " \t\\{$r, $g, $b, $a\\}," 2937 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2938 []>; 2939 2940multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 2941 NVPTXRegClass intype> { 2942 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 2943 (ins Int64Regs:$t, Int64Regs:$s)>; 2944 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, 2945 (ins Int64Regs:$t, i64imm:$s)>; 2946 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 2947 (ins i64imm:$t, Int64Regs:$s)>; 2948 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype, 2949 (ins i64imm:$t, i64imm:$s)>; 2950} 2951 2952defm TEX_CUBE_F32_F32_LEVEL 2953 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; 2954defm TEX_CUBE_S32_F32_LEVEL 2955 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; 2956defm TEX_CUBE_U32_F32_LEVEL 2957 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; 2958 2959class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 2960 NVPTXRegClass intype, dag texsamp> 2961 : NVPTXInst<(outs outtype:$r, outtype:$g, 2962 outtype:$b, outtype:$a), 2963 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2964 intype:$z)), 2965 inst # " \t\\{$r, $g, $b, $a\\}," 2966 " [$t, $s, \\{$l, $x, $y, $z\\}];", 2967 []>; 2968 2969multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 2970 NVPTXRegClass intype> { 2971 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 2972 (ins Int64Regs:$t, Int64Regs:$s)>; 2973 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, 2974 (ins Int64Regs:$t, i64imm:$s)>; 2975 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 2976 (ins i64imm:$t, Int64Regs:$s)>; 2977 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype, 2978 (ins i64imm:$t, i64imm:$s)>; 2979} 2980 2981defm TEX_CUBE_ARRAY_F32_F32 2982 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 2983defm TEX_CUBE_ARRAY_S32_F32 2984 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 2985defm TEX_CUBE_ARRAY_U32_F32 2986 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 2987 2988class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2989 NVPTXRegClass intype, dag texsamp> 2990 : NVPTXInst<(outs outtype:$r, outtype:$g, 2991 outtype:$b, outtype:$a), 2992 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2993 intype:$z, intype:$lod)), 2994 inst # " \t\\{$r, $g, $b, $a\\}," 2995 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2996 []>; 2997 2998multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2999 NVPTXRegClass intype> { 3000 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3001 (ins Int64Regs:$t, Int64Regs:$s)>; 3002 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3003 (ins Int64Regs:$t, i64imm:$s)>; 3004 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3005 (ins i64imm:$t, Int64Regs:$s)>; 3006 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3007 (ins i64imm:$t, i64imm:$s)>; 3008} 3009 3010defm TEX_CUBE_ARRAY_F32_F32_LEVEL 3011 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3012 Float32Regs, Float32Regs>; 3013defm TEX_CUBE_ARRAY_S32_F32_LEVEL 3014 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3015 Int32Regs, Float32Regs>; 3016defm TEX_CUBE_ARRAY_U32_F32_LEVEL 3017 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3018 Int32Regs, Float32Regs>; 3019 3020class TLD4_2D_base<string inst, NVPTXRegClass outtype, 3021 NVPTXRegClass intype, dag texsamp> 3022 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3023 outtype:$v2, outtype:$v3), 3024 !con(texsamp, (ins intype:$x, intype:$y)), 3025 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", 3026 []>; 3027 3028multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3029 def _RR : TLD4_2D_base<inst, outtype, intype, 3030 (ins Int64Regs:$t, Int64Regs:$s)>; 3031 def _RI : TLD4_2D_base<inst, outtype, intype, 3032 (ins Int64Regs:$t, i64imm:$s)>; 3033 def _IR : TLD4_2D_base<inst, outtype, intype, 3034 (ins i64imm:$t, Int64Regs:$s)>; 3035 def _II : TLD4_2D_base<inst, outtype, intype, 3036 (ins i64imm:$t, i64imm:$s)>; 3037} 3038 3039defm TLD4_R_2D_F32_F32 3040 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3041defm TLD4_G_2D_F32_F32 3042 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3043defm TLD4_B_2D_F32_F32 3044 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3045defm TLD4_A_2D_F32_F32 3046 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3047 3048defm TLD4_R_2D_S32_F32 3049 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3050defm TLD4_G_2D_S32_F32 3051 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3052defm TLD4_B_2D_S32_F32 3053 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3054defm TLD4_A_2D_S32_F32 3055 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3056 3057defm TLD4_R_2D_U32_F32 3058 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3059defm TLD4_G_2D_U32_F32 3060 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3061defm TLD4_B_2D_U32_F32 3062 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3063defm TLD4_A_2D_U32_F32 3064 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3065 3066} 3067 3068 3069// texmode_unified 3070let IsTex = true, IsTexModeUnified = true in { 3071// Texture fetch instructions using handles 3072 3073class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, 3074 NVPTXRegClass intype, dag tex> 3075 : NVPTXInst<(outs outtype:$r, outtype:$g, 3076 outtype:$b, outtype:$a), 3077 !con(tex, (ins intype:$x)), 3078 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3079 []>; 3080 3081multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, 3082 NVPTXRegClass intype> { 3083 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3084 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; 3085} 3086 3087defm TEX_UNIFIED_1D_F32_S32 3088 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 3089defm TEX_UNIFIED_1D_F32_F32 3090 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3091defm TEX_UNIFIED_1D_S32_S32 3092 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 3093defm TEX_UNIFIED_1D_S32_F32 3094 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3095defm TEX_UNIFIED_1D_U32_S32 3096 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 3097defm TEX_UNIFIED_1D_U32_F32 3098 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3099 3100class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 3101 NVPTXRegClass intype, dag tex> 3102 : NVPTXInst<(outs outtype:$r, outtype:$g, 3103 outtype:$b, outtype:$a), 3104 !con(tex, (ins intype:$x, intype:$lod)), 3105 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", 3106 []>; 3107 3108multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, 3109 NVPTXRegClass intype> { 3110 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3111 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3112} 3113 3114defm TEX_UNIFIED_1D_F32_F32_LEVEL 3115 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3116defm TEX_UNIFIED_1D_S32_F32_LEVEL 3117 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3118defm TEX_UNIFIED_1D_U32_F32_LEVEL 3119 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3120 3121class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, 3122 NVPTXRegClass intype, dag tex> 3123 : NVPTXInst<(outs outtype:$r, outtype:$g, 3124 outtype:$b, outtype:$a), 3125 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), 3126 inst # " \t\\{$r, $g, $b, $a\\}," 3127 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3128 []>; 3129 3130multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, 3131 NVPTXRegClass intype> { 3132 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3133 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3134} 3135 3136defm TEX_UNIFIED_1D_F32_F32_GRAD 3137 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3138defm TEX_UNIFIED_1D_S32_F32_GRAD 3139 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3140defm TEX_UNIFIED_1D_U32_F32_GRAD 3141 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3142 3143class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 3144 NVPTXRegClass intype, dag tex> 3145 : NVPTXInst<(outs outtype:$r, outtype:$g, 3146 outtype:$b, outtype:$a), 3147 !con(tex, (ins Int32Regs:$l, intype:$x)), 3148 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", 3149 []>; 3150 3151multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, 3152 NVPTXRegClass intype> { 3153 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3154 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3155} 3156 3157defm TEX_UNIFIED_1D_ARRAY_F32_S32 3158 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 3159defm TEX_UNIFIED_1D_ARRAY_F32_F32 3160 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 3161defm TEX_UNIFIED_1D_ARRAY_S32_S32 3162 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 3163defm TEX_UNIFIED_1D_ARRAY_S32_F32 3164 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 3165defm TEX_UNIFIED_1D_ARRAY_U32_S32 3166 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 3167defm TEX_UNIFIED_1D_ARRAY_U32_F32 3168 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 3169 3170class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3171 NVPTXRegClass intype, dag tex> 3172 : NVPTXInst<(outs outtype:$r, outtype:$g, 3173 outtype:$b, outtype:$a), 3174 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), 3175 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", 3176 []>; 3177 3178multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3179 NVPTXRegClass intype> { 3180 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3181 (ins Int64Regs:$t)>; 3182 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3183 (ins i64imm:$t)>; 3184} 3185 3186defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3187 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", 3188 Float32Regs, Float32Regs>; 3189defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3190 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", 3191 Int32Regs, Float32Regs>; 3192defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3193 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", 3194 Int32Regs, Float32Regs>; 3195 3196class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3197 NVPTXRegClass intype, dag tex> 3198 : NVPTXInst<(outs outtype:$r, outtype:$g, 3199 outtype:$b, outtype:$a), 3200 !con(tex, (ins Int32Regs:$l, intype:$x, 3201 intype:$gradx, intype:$grady)), 3202 inst # " \t\\{$r, $g, $b, $a\\}," 3203 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3204 []>; 3205 3206multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3207 NVPTXRegClass intype> { 3208 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3209 (ins Int64Regs:$t)>; 3210 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3211 (ins i64imm:$t)>; 3212} 3213 3214defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3215 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", 3216 Float32Regs, Float32Regs>; 3217defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3218 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", 3219 Int32Regs, Float32Regs>; 3220defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3221 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", 3222 Int32Regs, Float32Regs>; 3223 3224class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3225 NVPTXRegClass intype, dag tex> 3226 : NVPTXInst<(outs outtype:$r, outtype:$g, 3227 outtype:$b, outtype:$a), 3228 !con(tex, (ins intype:$x, intype:$y)), 3229 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", 3230 []>; 3231 3232multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3233 NVPTXRegClass intype> { 3234 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3235 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3236} 3237 3238defm TEX_UNIFIED_2D_F32_S32 3239 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 3240defm TEX_UNIFIED_2D_F32_F32 3241 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3242defm TEX_UNIFIED_2D_S32_S32 3243 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 3244defm TEX_UNIFIED_2D_S32_F32 3245 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3246defm TEX_UNIFIED_2D_U32_S32 3247 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 3248defm TEX_UNIFIED_2D_U32_F32 3249 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3250 3251class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 3252 NVPTXRegClass intype, dag tex> 3253 : NVPTXInst<(outs outtype:$r, outtype:$g, 3254 outtype:$b, outtype:$a), 3255 !con(tex, (ins intype:$x, intype:$y, intype:$lod)), 3256 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", 3257 []>; 3258 3259multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, 3260 NVPTXRegClass intype> { 3261 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3262 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3263} 3264 3265defm TEX_UNIFIED_2D_F32_F32_LEVEL 3266 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3267defm TEX_UNIFIED_2D_S32_F32_LEVEL 3268 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3269defm TEX_UNIFIED_2D_U32_F32_LEVEL 3270 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3271 3272class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, 3273 NVPTXRegClass intype, dag tex> 3274 : NVPTXInst<(outs outtype:$r, outtype:$g, 3275 outtype:$b, outtype:$a), 3276 !con(tex, (ins intype:$x, intype:$y, 3277 intype:$gradx0, intype:$gradx1, 3278 intype:$grady0, intype:$grady1)), 3279 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," 3280 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3281 []>; 3282multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, 3283 NVPTXRegClass intype> { 3284 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3285 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3286} 3287 3288defm TEX_UNIFIED_2D_F32_F32_GRAD 3289 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3290defm TEX_UNIFIED_2D_S32_F32_GRAD 3291 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3292defm TEX_UNIFIED_2D_U32_F32_GRAD 3293 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3294 3295class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 3296 NVPTXRegClass intype, dag tex> 3297 : NVPTXInst<(outs outtype:$r, outtype:$g, 3298 outtype:$b, outtype:$a), 3299 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), 3300 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", 3301 []>; 3302multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, 3303 NVPTXRegClass intype> { 3304 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3305 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3306} 3307 3308defm TEX_UNIFIED_2D_ARRAY_F32_S32 3309 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 3310defm TEX_UNIFIED_2D_ARRAY_F32_F32 3311 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3312defm TEX_UNIFIED_2D_ARRAY_S32_S32 3313 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 3314defm TEX_UNIFIED_2D_ARRAY_S32_F32 3315 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3316defm TEX_UNIFIED_2D_ARRAY_U32_S32 3317 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 3318defm TEX_UNIFIED_2D_ARRAY_U32_F32 3319 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3320 3321class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3322 NVPTXRegClass intype, dag tex> 3323 : NVPTXInst<(outs outtype:$r, outtype:$g, 3324 outtype:$b, outtype:$a), 3325 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3326 intype:$lod)), 3327 inst # " \t\\{$r, $g, $b, $a\\}," 3328 " [$t, \\{$l, $x, $y, $y\\}], $lod;", 3329 []>; 3330multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3331 NVPTXRegClass intype> { 3332 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3333 (ins Int64Regs:$t)>; 3334 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3335 (ins i64imm:$t)>; 3336} 3337 3338defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3339 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", 3340 Float32Regs, Float32Regs>; 3341defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3342 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", 3343 Int32Regs, Float32Regs>; 3344defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3345 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", 3346 Int32Regs, Float32Regs>; 3347 3348class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3349 NVPTXRegClass intype, dag tex> 3350 : NVPTXInst<(outs outtype:$r, outtype:$g, 3351 outtype:$b, outtype:$a), 3352 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3353 intype:$gradx0, intype:$gradx1, 3354 intype:$grady0, intype:$grady1)), 3355 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," 3356 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3357 []>; 3358multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3359 NVPTXRegClass intype> { 3360 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3361 (ins Int64Regs:$t)>; 3362 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3363 (ins i64imm:$t)>; 3364} 3365 3366defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3367 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", 3368 Float32Regs, Float32Regs>; 3369defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3370 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", 3371 Int32Regs, Float32Regs>; 3372defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3373 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", 3374 Int32Regs, Float32Regs>; 3375 3376class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, 3377 NVPTXRegClass intype, dag tex> 3378 : NVPTXInst<(outs outtype:$r, outtype:$g, 3379 outtype:$b, outtype:$a), 3380 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3381 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3382 []>; 3383multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, 3384 NVPTXRegClass intype> { 3385 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3386 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; 3387} 3388 3389defm TEX_UNIFIED_3D_F32_S32 3390 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3391defm TEX_UNIFIED_3D_F32_F32 3392 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3393defm TEX_UNIFIED_3D_S32_S32 3394 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3395defm TEX_UNIFIED_3D_S32_F32 3396 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3397defm TEX_UNIFIED_3D_U32_S32 3398 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3399defm TEX_UNIFIED_3D_U32_F32 3400 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3401 3402class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3403 NVPTXRegClass intype, dag tex> 3404 : NVPTXInst<(outs outtype:$r, outtype:$g, 3405 outtype:$b, outtype:$a), 3406 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3407 inst # " \t\\{$r, $g, $b, $a\\}," 3408 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3409 []>; 3410multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, 3411 NVPTXRegClass intype> { 3412 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3413 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3414} 3415 3416defm TEX_UNIFIED_3D_F32_F32_LEVEL 3417 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3418defm TEX_UNIFIED_3D_S32_F32_LEVEL 3419 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3420defm TEX_UNIFIED_3D_U32_F32_LEVEL 3421 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3422 3423class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3424 NVPTXRegClass intype, dag tex> 3425 : NVPTXInst<(outs outtype:$r, outtype:$g, 3426 outtype:$b, outtype:$a), 3427 !con(tex, (ins intype:$x, intype:$y, intype:$z, 3428 intype:$gradx0, intype:$gradx1, 3429 intype:$gradx2, intype:$grady0, 3430 intype:$grady1, intype:$grady2)), 3431 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 3432 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3433 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3434 []>; 3435multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, 3436 NVPTXRegClass intype> { 3437 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3438 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3439} 3440 3441defm TEX_UNIFIED_3D_F32_F32_GRAD 3442 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3443defm TEX_UNIFIED_3D_S32_F32_GRAD 3444 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3445defm TEX_UNIFIED_3D_U32_F32_GRAD 3446 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3447 3448class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, 3449 NVPTXRegClass intype, dag tex> 3450 : NVPTXInst<(outs outtype:$r, outtype:$g, 3451 outtype:$b, outtype:$a), 3452 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3453 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3454 []>; 3455multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, 3456 NVPTXRegClass intype> { 3457 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3458 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; 3459} 3460 3461defm TEX_UNIFIED_CUBE_F32_F32 3462 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3463defm TEX_UNIFIED_CUBE_S32_F32 3464 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3465defm TEX_UNIFIED_CUBE_U32_F32 3466 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3467 3468class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3469 NVPTXRegClass intype, dag tex> 3470 : NVPTXInst<(outs outtype:$r, outtype:$g, 3471 outtype:$b, outtype:$a), 3472 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3473 inst # " \t\\{$r, $g, $b, $a\\}," 3474 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3475 []>; 3476multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3477 NVPTXRegClass intype> { 3478 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3479 (ins Int64Regs:$t)>; 3480 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3481 (ins i64imm:$t)>; 3482} 3483 3484defm TEX_UNIFIED_CUBE_F32_F32_LEVEL 3485 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", 3486 Float32Regs, Float32Regs>; 3487defm TEX_UNIFIED_CUBE_S32_F32_LEVEL 3488 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", 3489 Int32Regs, Float32Regs>; 3490defm TEX_UNIFIED_CUBE_U32_F32_LEVEL 3491 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", 3492 Int32Regs, Float32Regs>; 3493 3494class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3495 NVPTXRegClass intype, dag tex> 3496 : NVPTXInst<(outs outtype:$r, outtype:$g, 3497 outtype:$b, outtype:$a), 3498 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), 3499 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", 3500 []>; 3501multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3502 NVPTXRegClass intype> { 3503 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3504 (ins Int64Regs:$t)>; 3505 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3506 (ins i64imm:$t)>; 3507} 3508 3509defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 3510 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3511defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 3512 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3513defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 3514 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3515 3516class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3517 NVPTXRegClass intype, dag tex> 3518 : NVPTXInst<(outs outtype:$r, outtype:$g, 3519 outtype:$b, outtype:$a), 3520 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 3521 intype:$lod)), 3522 inst # " \t\\{$r, $g, $b, $a\\}," 3523 " [$t, \\{$l, $x, $y, $z\\}], $lod;", 3524 []>; 3525multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3526 NVPTXRegClass intype> { 3527 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3528 (ins Int64Regs:$t)>; 3529 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3530 (ins i64imm:$t)>; 3531} 3532 3533defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3534 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3535 Float32Regs, Float32Regs>; 3536defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3537 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3538 Int32Regs, Float32Regs>; 3539defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3540 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3541 Int32Regs, Float32Regs>; 3542 3543class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3544 NVPTXRegClass intype, dag tex> 3545 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3546 outtype:$v2, outtype:$v3), 3547 !con(tex, (ins intype:$x, intype:$y)), 3548 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", 3549 []>; 3550multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3551 NVPTXRegClass intype> { 3552 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3553 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3554} 3555 3556defm TLD4_UNIFIED_R_2D_F32_F32 3557 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3558defm TLD4_UNIFIED_G_2D_F32_F32 3559 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3560defm TLD4_UNIFIED_B_2D_F32_F32 3561 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3562defm TLD4_UNIFIED_A_2D_F32_F32 3563 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3564 3565defm TLD4_UNIFIED_R_2D_S32_F32 3566 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3567defm TLD4_UNIFIED_G_2D_S32_F32 3568 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3569defm TLD4_UNIFIED_B_2D_S32_F32 3570 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3571defm TLD4_UNIFIED_A_2D_S32_F32 3572 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3573 3574defm TLD4_UNIFIED_R_2D_U32_F32 3575 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3576defm TLD4_UNIFIED_G_2D_U32_F32 3577 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3578defm TLD4_UNIFIED_B_2D_U32_F32 3579 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3580defm TLD4_UNIFIED_A_2D_U32_F32 3581 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3582 3583} 3584 3585 3586 3587//=== Surface load instructions 3588 3589let IsSuld = true in { 3590 3591class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf> 3592 : NVPTXInst<(outs outtype:$r), 3593 !con(surf, (ins Int32Regs:$x)), 3594 inst # " \\{$r\\}, [$s, \\{$x\\}];", 3595 []>; 3596multiclass SULD_1D<string inst, NVPTXRegClass outtype> { 3597 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>; 3598 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; 3599} 3600 3601defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; 3602defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; 3603defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; 3604defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; 3605 3606defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; 3607defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; 3608defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; 3609defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; 3610 3611defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; 3612defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; 3613defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; 3614defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; 3615 3616class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3617 : NVPTXInst<(outs outtype:$r), 3618 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3619 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", 3620 []>; 3621multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { 3622 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3623 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3624} 3625 3626defm SULD_1D_ARRAY_I8_CLAMP 3627 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; 3628defm SULD_1D_ARRAY_I16_CLAMP 3629 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; 3630defm SULD_1D_ARRAY_I32_CLAMP 3631 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; 3632defm SULD_1D_ARRAY_I64_CLAMP 3633 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; 3634 3635defm SULD_1D_ARRAY_I8_TRAP 3636 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; 3637defm SULD_1D_ARRAY_I16_TRAP 3638 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; 3639defm SULD_1D_ARRAY_I32_TRAP 3640 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; 3641defm SULD_1D_ARRAY_I64_TRAP 3642 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; 3643 3644defm SULD_1D_ARRAY_I8_ZERO 3645 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; 3646defm SULD_1D_ARRAY_I16_ZERO 3647 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; 3648defm SULD_1D_ARRAY_I32_ZERO 3649 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; 3650defm SULD_1D_ARRAY_I64_ZERO 3651 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; 3652 3653class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf> 3654 : NVPTXInst<(outs outtype:$r), 3655 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3656 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", 3657 []>; 3658multiclass SULD_2D<string inst, NVPTXRegClass outtype> { 3659 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>; 3660 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; 3661} 3662 3663defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; 3664defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; 3665defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; 3666defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; 3667 3668defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; 3669defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; 3670defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; 3671defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; 3672 3673defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; 3674defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; 3675defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; 3676defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; 3677 3678class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3679 : NVPTXInst<(outs outtype:$r), 3680 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3681 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3682 []>; 3683multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { 3684 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3685 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3686} 3687 3688defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; 3689defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; 3690defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; 3691defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; 3692 3693defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; 3694defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; 3695defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; 3696defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; 3697 3698defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; 3699defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; 3700defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; 3701defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; 3702 3703class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf> 3704 : NVPTXInst<(outs outtype:$r), 3705 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 3706 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3707 []>; 3708multiclass SULD_3D<string inst, NVPTXRegClass outtype> { 3709 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>; 3710 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; 3711} 3712 3713defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; 3714defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; 3715defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; 3716defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; 3717 3718defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; 3719defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; 3720defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; 3721defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; 3722 3723defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; 3724defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; 3725defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; 3726defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; 3727} 3728 3729let IsSuld = 2 in { 3730 3731class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3732 : NVPTXInst<(outs outtype:$r, outtype:$g), 3733 !con(surf, (ins Int32Regs:$x)), 3734 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", 3735 []>; 3736multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { 3737 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3738 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; 3739} 3740 3741defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; 3742defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; 3743defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; 3744defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; 3745 3746defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; 3747defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; 3748defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; 3749defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; 3750 3751defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; 3752defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; 3753defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; 3754defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; 3755 3756class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3757 : NVPTXInst<(outs outtype:$r, outtype:$g), 3758 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3759 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3760 []>; 3761multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 3762 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3763 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 3764} 3765 3766defm SULD_1D_ARRAY_V2I8_CLAMP 3767 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; 3768defm SULD_1D_ARRAY_V2I16_CLAMP 3769 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; 3770defm SULD_1D_ARRAY_V2I32_CLAMP 3771 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; 3772defm SULD_1D_ARRAY_V2I64_CLAMP 3773 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; 3774 3775defm SULD_1D_ARRAY_V2I8_TRAP 3776 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; 3777defm SULD_1D_ARRAY_V2I16_TRAP 3778 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; 3779defm SULD_1D_ARRAY_V2I32_TRAP 3780 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; 3781defm SULD_1D_ARRAY_V2I64_TRAP 3782 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; 3783 3784defm SULD_1D_ARRAY_V2I8_ZERO 3785 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; 3786defm SULD_1D_ARRAY_V2I16_ZERO 3787 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; 3788defm SULD_1D_ARRAY_V2I32_ZERO 3789 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; 3790defm SULD_1D_ARRAY_V2I64_ZERO 3791 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; 3792 3793class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3794 : NVPTXInst<(outs outtype:$r, outtype:$g), 3795 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3796 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3797 []>; 3798multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { 3799 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3800 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; 3801} 3802 3803defm SULD_2D_V2I8_CLAMP 3804 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; 3805defm SULD_2D_V2I16_CLAMP 3806 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; 3807defm SULD_2D_V2I32_CLAMP 3808 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; 3809defm SULD_2D_V2I64_CLAMP 3810 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; 3811 3812defm SULD_2D_V2I8_TRAP 3813 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; 3814defm SULD_2D_V2I16_TRAP 3815 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; 3816defm SULD_2D_V2I32_TRAP 3817 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; 3818defm SULD_2D_V2I64_TRAP 3819 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; 3820 3821defm SULD_2D_V2I8_ZERO 3822 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; 3823defm SULD_2D_V2I16_ZERO 3824 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; 3825defm SULD_2D_V2I32_ZERO 3826 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; 3827defm SULD_2D_V2I64_ZERO 3828 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; 3829 3830class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3831 : NVPTXInst<(outs outtype:$r, outtype:$g), 3832 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3833 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", 3834 []>; 3835multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 3836 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3837 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 3838} 3839 3840defm SULD_2D_ARRAY_V2I8_CLAMP 3841 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; 3842defm SULD_2D_ARRAY_V2I16_CLAMP 3843 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; 3844defm SULD_2D_ARRAY_V2I32_CLAMP 3845 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; 3846defm SULD_2D_ARRAY_V2I64_CLAMP 3847 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; 3848 3849defm SULD_2D_ARRAY_V2I8_TRAP 3850 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; 3851defm SULD_2D_ARRAY_V2I16_TRAP 3852 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; 3853defm SULD_2D_ARRAY_V2I32_TRAP 3854 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; 3855defm SULD_2D_ARRAY_V2I64_TRAP 3856 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; 3857 3858defm SULD_2D_ARRAY_V2I8_ZERO 3859 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; 3860defm SULD_2D_ARRAY_V2I16_ZERO 3861 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; 3862defm SULD_2D_ARRAY_V2I32_ZERO 3863 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; 3864defm SULD_2D_ARRAY_V2I64_ZERO 3865 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; 3866 3867class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3868 : NVPTXInst<(outs outtype:$r, outtype:$g), 3869 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 3870 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3871 []>; 3872multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { 3873 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3874 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; 3875} 3876 3877defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; 3878defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; 3879defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; 3880defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; 3881 3882defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; 3883defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; 3884defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; 3885defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; 3886 3887defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; 3888defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; 3889defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; 3890defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; 3891 3892} 3893 3894let IsSuld = 3 in { 3895 3896class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 3897 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 3898 !con(surf, (ins Int32Regs:$x)), 3899 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3900 []>; 3901multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { 3902 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 3903 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; 3904} 3905 3906defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; 3907defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; 3908defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; 3909 3910defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; 3911defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; 3912defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; 3913 3914defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; 3915defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; 3916defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; 3917 3918class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 3919 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 3920 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3921 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", 3922 []>; 3923multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 3924 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 3925 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 3926} 3927 3928defm SULD_1D_ARRAY_V4I8_CLAMP 3929 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; 3930defm SULD_1D_ARRAY_V4I16_CLAMP 3931 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; 3932defm SULD_1D_ARRAY_V4I32_CLAMP 3933 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; 3934 3935defm SULD_1D_ARRAY_V4I8_TRAP 3936 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; 3937defm SULD_1D_ARRAY_V4I16_TRAP 3938 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; 3939defm SULD_1D_ARRAY_V4I32_TRAP 3940 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; 3941 3942defm SULD_1D_ARRAY_V4I8_ZERO 3943 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; 3944defm SULD_1D_ARRAY_V4I16_ZERO 3945 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; 3946defm SULD_1D_ARRAY_V4I32_ZERO 3947 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; 3948 3949class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 3950 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 3951 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3952 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3953 []>; 3954multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { 3955 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 3956 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; 3957} 3958 3959defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; 3960defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; 3961defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; 3962 3963defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; 3964defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; 3965defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; 3966 3967defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; 3968defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; 3969defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; 3970 3971class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 3972 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 3973 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3974 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", 3975 []>; 3976multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 3977 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 3978 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 3979} 3980 3981defm SULD_2D_ARRAY_V4I8_CLAMP 3982 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; 3983defm SULD_2D_ARRAY_V4I16_CLAMP 3984 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; 3985defm SULD_2D_ARRAY_V4I32_CLAMP 3986 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; 3987 3988defm SULD_2D_ARRAY_V4I8_TRAP 3989 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; 3990defm SULD_2D_ARRAY_V4I16_TRAP 3991 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; 3992defm SULD_2D_ARRAY_V4I32_TRAP 3993 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; 3994 3995defm SULD_2D_ARRAY_V4I8_ZERO 3996 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; 3997defm SULD_2D_ARRAY_V4I16_ZERO 3998 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; 3999defm SULD_2D_ARRAY_V4I32_ZERO 4000 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; 4001 4002class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4003 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4004 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4005 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", 4006 []>; 4007multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { 4008 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4009 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; 4010} 4011 4012defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; 4013defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; 4014defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; 4015 4016defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; 4017defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; 4018defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; 4019 4020defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; 4021defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; 4022defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; 4023 4024} 4025 4026//----------------------------------- 4027// Texture Query Intrinsics 4028//----------------------------------- 4029 4030let IsSurfTexQuery = true in { 4031def TXQ_CHANNEL_ORDER_R 4032 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4033 "txq.channel_order.b32 \t$d, [$a];", 4034 []>; 4035def TXQ_CHANNEL_ORDER_I 4036 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4037 "txq.channel_order.b32 \t$d, [$a];", 4038 []>; 4039def TXQ_CHANNEL_DATA_TYPE_R 4040 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4041 "txq.channel_data_type.b32 \t$d, [$a];", 4042 []>; 4043def TXQ_CHANNEL_DATA_TYPE_I 4044 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4045 "txq.channel_data_type.b32 \t$d, [$a];", 4046 []>; 4047def TXQ_WIDTH_R 4048 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4049 "txq.width.b32 \t$d, [$a];", 4050 []>; 4051def TXQ_WIDTH_I 4052 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4053 "txq.width.b32 \t$d, [$a];", 4054 []>; 4055def TXQ_HEIGHT_R 4056 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4057 "txq.height.b32 \t$d, [$a];", 4058 []>; 4059def TXQ_HEIGHT_I 4060 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4061 "txq.height.b32 \t$d, [$a];", 4062 []>; 4063def TXQ_DEPTH_R 4064 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4065 "txq.depth.b32 \t$d, [$a];", 4066 []>; 4067def TXQ_DEPTH_I 4068 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4069 "txq.depth.b32 \t$d, [$a];", 4070 []>; 4071def TXQ_ARRAY_SIZE_R 4072 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4073 "txq.array_size.b32 \t$d, [$a];", 4074 []>; 4075def TXQ_ARRAY_SIZE_I 4076 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4077 "txq.array_size.b32 \t$d, [$a];", 4078 []>; 4079def TXQ_NUM_SAMPLES_R 4080 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4081 "txq.num_samples.b32 \t$d, [$a];", 4082 []>; 4083def TXQ_NUM_SAMPLES_I 4084 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4085 "txq.num_samples.b32 \t$d, [$a];", 4086 []>; 4087def TXQ_NUM_MIPMAP_LEVELS_R 4088 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4089 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4090 []>; 4091def TXQ_NUM_MIPMAP_LEVELS_I 4092 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4093 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4094 []>; 4095} 4096 4097def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4098 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4099def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4100 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4101def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4102 (TXQ_WIDTH_R Int64Regs:$a)>; 4103def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4104 (TXQ_HEIGHT_R Int64Regs:$a)>; 4105def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4106 (TXQ_DEPTH_R Int64Regs:$a)>; 4107def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4108 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; 4109def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4110 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; 4111def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4112 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; 4113 4114 4115//----------------------------------- 4116// Surface Query Intrinsics 4117//----------------------------------- 4118 4119let IsSurfTexQuery = true in { 4120def SUQ_CHANNEL_ORDER_R 4121 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4122 "suq.channel_order.b32 \t$d, [$a];", 4123 []>; 4124def SUQ_CHANNEL_ORDER_I 4125 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4126 "suq.channel_order.b32 \t$d, [$a];", 4127 []>; 4128def SUQ_CHANNEL_DATA_TYPE_R 4129 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4130 "suq.channel_data_type.b32 \t$d, [$a];", 4131 []>; 4132def SUQ_CHANNEL_DATA_TYPE_I 4133 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4134 "suq.channel_data_type.b32 \t$d, [$a];", 4135 []>; 4136def SUQ_WIDTH_R 4137 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4138 "suq.width.b32 \t$d, [$a];", 4139 []>; 4140def SUQ_WIDTH_I 4141 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4142 "suq.width.b32 \t$d, [$a];", 4143 []>; 4144def SUQ_HEIGHT_R 4145 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4146 "suq.height.b32 \t$d, [$a];", 4147 []>; 4148def SUQ_HEIGHT_I 4149 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4150 "suq.height.b32 \t$d, [$a];", 4151 []>; 4152def SUQ_DEPTH_R 4153 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4154 "suq.depth.b32 \t$d, [$a];", 4155 []>; 4156def SUQ_DEPTH_I 4157 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4158 "suq.depth.b32 \t$d, [$a];", 4159 []>; 4160def SUQ_ARRAY_SIZE_R 4161 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4162 "suq.array_size.b32 \t$d, [$a];", 4163 []>; 4164def SUQ_ARRAY_SIZE_I 4165 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4166 "suq.array_size.b32 \t$d, [$a];", 4167 []>; 4168} 4169 4170def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4171 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4172def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4173 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4174def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4175 (SUQ_WIDTH_R Int64Regs:$a)>; 4176def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4177 (SUQ_HEIGHT_R Int64Regs:$a)>; 4178def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4179 (SUQ_DEPTH_R Int64Regs:$a)>; 4180def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4181 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; 4182 4183 4184//===- Handle Query -------------------------------------------------------===// 4185 4186// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4187def ISTYPEP_SAMPLER 4188 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4189 "istypep.samplerref \t$d, $a;", 4190 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4191def ISTYPEP_SURFACE 4192 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4193 "istypep.surfref \t$d, $a;", 4194 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4195def ISTYPEP_TEXTURE 4196 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4197 "istypep.texref \t$d, $a;", 4198 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4199 4200//===- Surface Stores -----------------------------------------------------===// 4201 4202let IsSust = true in { 4203 4204class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf> 4205 : NVPTXInst<(outs), 4206 !con(surf, (ins Int32Regs:$x, intype:$r)), 4207 inst # " \t[$s, \\{$x\\}], \\{$r\\};", 4208 []>; 4209multiclass SUST_1D<string inst, NVPTXRegClass intype> { 4210 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>; 4211 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>; 4212} 4213 4214defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; 4215defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; 4216defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; 4217defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; 4218 4219defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; 4220defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; 4221defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; 4222defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; 4223 4224defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; 4225defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; 4226defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; 4227defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; 4228 4229defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; 4230defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; 4231defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; 4232 4233class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4234 : NVPTXInst<(outs), 4235 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), 4236 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", 4237 []>; 4238multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> { 4239 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4240 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>; 4241} 4242 4243defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; 4244defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; 4245defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; 4246defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; 4247 4248defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; 4249defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; 4250defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; 4251defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; 4252 4253defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; 4254defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; 4255defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; 4256defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; 4257 4258defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; 4259defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; 4260defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; 4261 4262class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4263 : NVPTXInst<(outs), 4264 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, 4265 intype:$b, intype:$a)), 4266 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4267 []>; 4268multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> { 4269 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4270 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>; 4271} 4272 4273defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; 4274defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; 4275defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; 4276 4277defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; 4278defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; 4279defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; 4280 4281defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; 4282defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; 4283defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; 4284 4285defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; 4286defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; 4287defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; 4288 4289class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4290 : NVPTXInst<(outs), 4291 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), 4292 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4293 []>; 4294multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> { 4295 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4296 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4297} 4298 4299defm SUST_B_1D_ARRAY_B8_CLAMP 4300 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; 4301defm SUST_B_1D_ARRAY_B16_CLAMP 4302 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; 4303defm SUST_B_1D_ARRAY_B32_CLAMP 4304 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; 4305defm SUST_B_1D_ARRAY_B64_CLAMP 4306 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; 4307 4308defm SUST_B_1D_ARRAY_B8_TRAP 4309 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; 4310defm SUST_B_1D_ARRAY_B16_TRAP 4311 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; 4312defm SUST_B_1D_ARRAY_B32_TRAP 4313 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; 4314defm SUST_B_1D_ARRAY_B64_TRAP 4315 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; 4316 4317defm SUST_B_1D_ARRAY_B8_ZERO 4318 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; 4319defm SUST_B_1D_ARRAY_B16_ZERO 4320 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; 4321defm SUST_B_1D_ARRAY_B32_ZERO 4322 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; 4323defm SUST_B_1D_ARRAY_B64_ZERO 4324 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; 4325 4326defm SUST_P_1D_ARRAY_B8_TRAP 4327 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; 4328defm SUST_P_1D_ARRAY_B16_TRAP 4329 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; 4330defm SUST_P_1D_ARRAY_B32_TRAP 4331 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; 4332 4333class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4334 : NVPTXInst<(outs), 4335 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4336 intype:$r, intype:$g)), 4337 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4338 []>; 4339multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4340 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4341 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4342} 4343 4344defm SUST_B_1D_ARRAY_V2B8_CLAMP 4345 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; 4346defm SUST_B_1D_ARRAY_V2B16_CLAMP 4347 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; 4348defm SUST_B_1D_ARRAY_V2B32_CLAMP 4349 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; 4350defm SUST_B_1D_ARRAY_V2B64_CLAMP 4351 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; 4352 4353defm SUST_B_1D_ARRAY_V2B8_TRAP 4354 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; 4355defm SUST_B_1D_ARRAY_V2B16_TRAP 4356 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; 4357defm SUST_B_1D_ARRAY_V2B32_TRAP 4358 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; 4359defm SUST_B_1D_ARRAY_V2B64_TRAP 4360 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; 4361 4362defm SUST_B_1D_ARRAY_V2B8_ZERO 4363 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; 4364defm SUST_B_1D_ARRAY_V2B16_ZERO 4365 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; 4366defm SUST_B_1D_ARRAY_V2B32_ZERO 4367 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; 4368defm SUST_B_1D_ARRAY_V2B64_ZERO 4369 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; 4370 4371defm SUST_P_1D_ARRAY_V2B8_TRAP 4372 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; 4373defm SUST_P_1D_ARRAY_V2B16_TRAP 4374 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; 4375defm SUST_P_1D_ARRAY_V2B32_TRAP 4376 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; 4377 4378class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4379 : NVPTXInst<(outs), 4380 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4381 intype:$r, intype:$g, intype:$b, intype:$a)), 4382 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", 4383 []>; 4384multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4385 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4386 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4387} 4388 4389defm SUST_B_1D_ARRAY_V4B8_CLAMP 4390 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; 4391defm SUST_B_1D_ARRAY_V4B16_CLAMP 4392 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; 4393defm SUST_B_1D_ARRAY_V4B32_CLAMP 4394 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; 4395 4396defm SUST_B_1D_ARRAY_V4B8_TRAP 4397 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; 4398defm SUST_B_1D_ARRAY_V4B16_TRAP 4399 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; 4400defm SUST_B_1D_ARRAY_V4B32_TRAP 4401 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; 4402 4403defm SUST_B_1D_ARRAY_V4B8_ZERO 4404 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; 4405defm SUST_B_1D_ARRAY_V4B16_ZERO 4406 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; 4407defm SUST_B_1D_ARRAY_V4B32_ZERO 4408 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; 4409 4410defm SUST_P_1D_ARRAY_V4B8_TRAP 4411 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; 4412defm SUST_P_1D_ARRAY_V4B16_TRAP 4413 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; 4414defm SUST_P_1D_ARRAY_V4B32_TRAP 4415 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; 4416 4417class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf> 4418 : NVPTXInst<(outs), 4419 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), 4420 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", 4421 []>; 4422multiclass SUST_2D<string inst, NVPTXRegClass intype> { 4423 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>; 4424 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>; 4425} 4426 4427defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; 4428defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; 4429defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; 4430defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; 4431 4432defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; 4433defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; 4434defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; 4435defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; 4436 4437defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; 4438defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; 4439defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; 4440defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; 4441 4442defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; 4443defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; 4444defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; 4445 4446class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4447 : NVPTXInst<(outs), 4448 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4449 intype:$r, intype:$g)), 4450 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4451 []>; 4452multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> { 4453 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4454 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>; 4455} 4456 4457defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; 4458defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; 4459defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; 4460defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; 4461 4462defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; 4463defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; 4464defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; 4465defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; 4466 4467defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; 4468defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; 4469defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; 4470defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; 4471 4472defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; 4473defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; 4474defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; 4475 4476class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4477 : NVPTXInst<(outs), 4478 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4479 intype:$r, intype:$g, intype:$b, intype:$a)), 4480 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", 4481 []>; 4482multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> { 4483 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4484 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>; 4485} 4486 4487defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; 4488defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; 4489defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; 4490 4491defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; 4492defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; 4493defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; 4494 4495defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; 4496defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; 4497defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; 4498 4499defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; 4500defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; 4501defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; 4502 4503class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4504 : NVPTXInst<(outs), 4505 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4506 intype:$r)), 4507 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4508 []>; 4509multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> { 4510 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4511 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4512} 4513 4514defm SUST_B_2D_ARRAY_B8_CLAMP 4515 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; 4516defm SUST_B_2D_ARRAY_B16_CLAMP 4517 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; 4518defm SUST_B_2D_ARRAY_B32_CLAMP 4519 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; 4520defm SUST_B_2D_ARRAY_B64_CLAMP 4521 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; 4522 4523defm SUST_B_2D_ARRAY_B8_TRAP 4524 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; 4525defm SUST_B_2D_ARRAY_B16_TRAP 4526 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; 4527defm SUST_B_2D_ARRAY_B32_TRAP 4528 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; 4529defm SUST_B_2D_ARRAY_B64_TRAP 4530 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; 4531 4532defm SUST_B_2D_ARRAY_B8_ZERO 4533 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; 4534defm SUST_B_2D_ARRAY_B16_ZERO 4535 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; 4536defm SUST_B_2D_ARRAY_B32_ZERO 4537 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; 4538defm SUST_B_2D_ARRAY_B64_ZERO 4539 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; 4540 4541defm SUST_P_2D_ARRAY_B8_TRAP 4542 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; 4543defm SUST_P_2D_ARRAY_B16_TRAP 4544 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; 4545defm SUST_P_2D_ARRAY_B32_TRAP 4546 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; 4547 4548class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4549 : NVPTXInst<(outs), 4550 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4551 intype:$r, intype:$g)), 4552 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", 4553 []>; 4554multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4555 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4556 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4557} 4558 4559defm SUST_B_2D_ARRAY_V2B8_CLAMP 4560 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; 4561defm SUST_B_2D_ARRAY_V2B16_CLAMP 4562 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; 4563defm SUST_B_2D_ARRAY_V2B32_CLAMP 4564 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; 4565defm SUST_B_2D_ARRAY_V2B64_CLAMP 4566 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; 4567 4568defm SUST_B_2D_ARRAY_V2B8_TRAP 4569 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; 4570defm SUST_B_2D_ARRAY_V2B16_TRAP 4571 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; 4572defm SUST_B_2D_ARRAY_V2B32_TRAP 4573 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; 4574defm SUST_B_2D_ARRAY_V2B64_TRAP 4575 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; 4576 4577defm SUST_B_2D_ARRAY_V2B8_ZERO 4578 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; 4579defm SUST_B_2D_ARRAY_V2B16_ZERO 4580 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; 4581defm SUST_B_2D_ARRAY_V2B32_ZERO 4582 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; 4583defm SUST_B_2D_ARRAY_V2B64_ZERO 4584 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; 4585 4586defm SUST_P_2D_ARRAY_V2B8_TRAP 4587 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; 4588defm SUST_P_2D_ARRAY_V2B16_TRAP 4589 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; 4590defm SUST_P_2D_ARRAY_V2B32_TRAP 4591 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; 4592 4593class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4594 : NVPTXInst<(outs), 4595 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4596 intype:$r, intype:$g, intype:$b, intype:$a)), 4597 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", 4598 []>; 4599multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4600 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4601 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4602} 4603 4604defm SUST_B_2D_ARRAY_V4B8_CLAMP 4605 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; 4606defm SUST_B_2D_ARRAY_V4B16_CLAMP 4607 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; 4608defm SUST_B_2D_ARRAY_V4B32_CLAMP 4609 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; 4610 4611defm SUST_B_2D_ARRAY_V4B8_TRAP 4612 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; 4613defm SUST_B_2D_ARRAY_V4B16_TRAP 4614 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; 4615defm SUST_B_2D_ARRAY_V4B32_TRAP 4616 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; 4617 4618defm SUST_B_2D_ARRAY_V4B8_ZERO 4619 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; 4620defm SUST_B_2D_ARRAY_V4B16_ZERO 4621 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; 4622defm SUST_B_2D_ARRAY_V4B32_ZERO 4623 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; 4624 4625defm SUST_P_2D_ARRAY_V4B8_TRAP 4626 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; 4627defm SUST_P_2D_ARRAY_V4B16_TRAP 4628 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; 4629defm SUST_P_2D_ARRAY_V4B32_TRAP 4630 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; 4631 4632class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf> 4633 : NVPTXInst<(outs), 4634 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4635 intype:$r)), 4636 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4637 []>; 4638multiclass SUST_3D<string inst, NVPTXRegClass intype> { 4639 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>; 4640 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>; 4641} 4642 4643defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; 4644defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; 4645defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; 4646defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; 4647 4648defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; 4649defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; 4650defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; 4651defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; 4652 4653defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; 4654defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; 4655defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; 4656defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; 4657 4658defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; 4659defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; 4660defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; 4661 4662class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4663 : NVPTXInst<(outs), 4664 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4665 intype:$r, intype:$g)), 4666 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", 4667 []>; 4668multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> { 4669 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4670 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>; 4671} 4672 4673defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; 4674defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; 4675defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; 4676defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; 4677 4678defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; 4679defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; 4680defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; 4681defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; 4682 4683defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; 4684defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; 4685defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; 4686defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; 4687 4688defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; 4689defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; 4690defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; 4691 4692class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4693 : NVPTXInst<(outs), 4694 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4695 intype:$r, intype:$g, intype:$b, intype:$a)), 4696 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", 4697 []>; 4698multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> { 4699 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4700 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>; 4701} 4702 4703defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; 4704defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; 4705defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; 4706 4707defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; 4708defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; 4709defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; 4710 4711defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; 4712defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; 4713defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; 4714 4715defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; 4716defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; 4717defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; 4718 4719} 4720 4721// Surface store instruction patterns 4722// I'm not sure why we can't just include these in the instruction definitions, 4723// but TableGen complains of type errors :( 4724 4725// .clamp variant 4726def : Pat<(int_nvvm_sust_b_1d_i8_clamp 4727 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4728 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4729 4730def : Pat<(int_nvvm_sust_b_1d_i16_clamp 4731 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4732 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4733 4734def : Pat<(int_nvvm_sust_b_1d_i32_clamp 4735 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4736 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 4737 4738def : Pat<(int_nvvm_sust_b_1d_i64_clamp 4739 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4740 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 4741 4742def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 4743 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4744 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4745 Int16Regs:$r, Int16Regs:$g)>; 4746 4747def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 4748 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4749 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4750 Int16Regs:$r, Int16Regs:$g)>; 4751 4752def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 4753 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4754 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4755 Int32Regs:$r, Int32Regs:$g)>; 4756 4757def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 4758 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4759 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4760 Int64Regs:$r, Int64Regs:$g)>; 4761 4762def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 4763 Int64Regs:$s, Int32Regs:$x, 4764 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4765 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4766 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4767 4768def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 4769 Int64Regs:$s, Int32Regs:$x, 4770 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4771 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4772 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4773 4774def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 4775 Int64Regs:$s, Int32Regs:$x, 4776 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4777 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4778 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4779 4780 4781 4782def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 4783 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 4784 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4785 Int16Regs:$r)>; 4786 4787def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 4788 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 4789 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4790 Int16Regs:$r)>; 4791 4792def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 4793 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 4794 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4795 Int32Regs:$r)>; 4796 4797def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 4798 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 4799 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4800 Int64Regs:$r)>; 4801 4802def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 4803 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4804 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4805 Int16Regs:$r, Int16Regs:$g)>; 4806 4807def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 4808 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4809 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4810 Int16Regs:$r, Int16Regs:$g)>; 4811 4812def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 4813 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4814 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4815 Int32Regs:$r, Int32Regs:$g)>; 4816 4817def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 4818 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4819 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4820 Int64Regs:$r, Int64Regs:$g)>; 4821 4822def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 4823 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4824 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4825 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4826 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4827 4828def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 4829 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4830 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4831 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4832 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4833 4834def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 4835 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4836 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4837 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 4838 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4839 4840 4841 4842def : Pat<(int_nvvm_sust_b_2d_i8_clamp 4843 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4844 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4845 Int16Regs:$r)>; 4846 4847def : Pat<(int_nvvm_sust_b_2d_i16_clamp 4848 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4849 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4850 Int16Regs:$r)>; 4851 4852def : Pat<(int_nvvm_sust_b_2d_i32_clamp 4853 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 4854 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4855 Int32Regs:$r)>; 4856 4857def : Pat<(int_nvvm_sust_b_2d_i64_clamp 4858 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 4859 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4860 Int64Regs:$r)>; 4861 4862def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 4863 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 4864 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4865 Int16Regs:$r, Int16Regs:$g)>; 4866 4867def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 4868 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 4869 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4870 Int16Regs:$r, Int16Regs:$g)>; 4871 4872def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 4873 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 4874 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4875 Int32Regs:$r, Int32Regs:$g)>; 4876 4877def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 4878 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 4879 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4880 Int64Regs:$r, Int64Regs:$g)>; 4881 4882def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 4883 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4884 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4885 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4886 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4887 4888def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 4889 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4890 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4891 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4892 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4893 4894def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 4895 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4896 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4897 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 4898 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4899 4900 4901 4902def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 4903 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4904 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, 4905 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4906 Int16Regs:$r)>; 4907 4908def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 4909 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4910 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, 4911 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4912 Int16Regs:$r)>; 4913 4914def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 4915 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 4916 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, 4917 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4918 Int32Regs:$r)>; 4919 4920def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 4921 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 4922 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, 4923 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4924 Int64Regs:$r)>; 4925 4926def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 4927 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4928 Int16Regs:$r, Int16Regs:$g), 4929 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4930 Int32Regs:$x, Int32Regs:$y, 4931 Int16Regs:$r, Int16Regs:$g)>; 4932 4933def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 4934 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4935 Int16Regs:$r, Int16Regs:$g), 4936 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4937 Int32Regs:$x, Int32Regs:$y, 4938 Int16Regs:$r, Int16Regs:$g)>; 4939 4940def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 4941 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 4942 Int32Regs:$g), 4943 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4944 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 4945 4946def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 4947 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 4948 Int64Regs:$g), 4949 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4950 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 4951 4952def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 4953 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4954 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4955 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, 4956 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4957 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4958 4959def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 4960 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4961 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4962 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, 4963 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4964 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4965 4966def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 4967 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 4968 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4969 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 4970 Int32Regs:$x, Int32Regs:$y, 4971 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4972 4973 4974 4975def : Pat<(int_nvvm_sust_b_3d_i8_clamp 4976 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4977 Int16Regs:$r), 4978 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, 4979 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4980 Int16Regs:$r)>; 4981 4982def : Pat<(int_nvvm_sust_b_3d_i16_clamp 4983 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4984 Int16Regs:$r), 4985 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, 4986 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4987 Int16Regs:$r)>; 4988 4989def : Pat<(int_nvvm_sust_b_3d_i32_clamp 4990 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4991 Int32Regs:$r), 4992 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, 4993 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4994 Int32Regs:$r)>; 4995 4996def : Pat<(int_nvvm_sust_b_3d_i64_clamp 4997 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4998 Int64Regs:$r), 4999 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, 5000 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5001 Int64Regs:$r)>; 5002 5003def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 5004 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5005 Int16Regs:$r, Int16Regs:$g), 5006 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, 5007 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5008 Int16Regs:$r, Int16Regs:$g)>; 5009 5010def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 5011 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5012 Int16Regs:$r, Int16Regs:$g), 5013 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, 5014 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5015 Int16Regs:$r, Int16Regs:$g)>; 5016 5017def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 5018 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5019 Int32Regs:$r, Int32Regs:$g), 5020 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, 5021 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5022 Int32Regs:$r, Int32Regs:$g)>; 5023 5024def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 5025 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5026 Int64Regs:$r, Int64Regs:$g), 5027 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, 5028 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5029 Int64Regs:$r, Int64Regs:$g)>; 5030 5031def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 5032 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5033 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5034 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, 5035 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5036 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5037 5038def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 5039 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5040 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5041 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, 5042 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5043 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5044 5045def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 5046 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5047 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5048 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, 5049 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5050 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5051 5052 5053// .trap variant 5054def : Pat<(int_nvvm_sust_b_1d_i8_trap 5055 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5056 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5057 5058def : Pat<(int_nvvm_sust_b_1d_i16_trap 5059 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5060 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5061 5062def : Pat<(int_nvvm_sust_b_1d_i32_trap 5063 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5064 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5065 5066def : Pat<(int_nvvm_sust_b_1d_i64_trap 5067 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5068 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5069 5070def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 5071 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5072 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5073 Int16Regs:$r, Int16Regs:$g)>; 5074 5075def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 5076 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5077 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5078 Int16Regs:$r, Int16Regs:$g)>; 5079 5080def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 5081 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5082 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5083 Int32Regs:$r, Int32Regs:$g)>; 5084 5085def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 5086 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5087 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, 5088 Int64Regs:$r, Int64Regs:$g)>; 5089 5090def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 5091 Int64Regs:$s, Int32Regs:$x, 5092 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5093 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5094 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5095 5096def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 5097 Int64Regs:$s, Int32Regs:$x, 5098 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5099 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5100 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5101 5102def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 5103 Int64Regs:$s, Int32Regs:$x, 5104 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5105 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5106 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5107 5108 5109 5110def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 5111 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5112 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5113 Int16Regs:$r)>; 5114 5115def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 5116 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5117 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5118 Int16Regs:$r)>; 5119 5120def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 5121 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5122 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5123 Int32Regs:$r)>; 5124 5125def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 5126 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5127 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5128 Int64Regs:$r)>; 5129 5130def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 5131 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5132 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5133 Int16Regs:$r, Int16Regs:$g)>; 5134 5135def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 5136 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5137 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5138 Int16Regs:$r, Int16Regs:$g)>; 5139 5140def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 5141 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5142 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5143 Int32Regs:$r, Int32Regs:$g)>; 5144 5145def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 5146 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5147 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5148 Int64Regs:$r, Int64Regs:$g)>; 5149 5150def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 5151 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5152 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5153 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5154 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5155 5156def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 5157 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5158 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5159 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5160 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5161 5162def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 5163 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5164 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5165 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5166 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5167 5168 5169 5170def : Pat<(int_nvvm_sust_b_2d_i8_trap 5171 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5172 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5173 Int16Regs:$r)>; 5174 5175def : Pat<(int_nvvm_sust_b_2d_i16_trap 5176 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5177 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5178 Int16Regs:$r)>; 5179 5180def : Pat<(int_nvvm_sust_b_2d_i32_trap 5181 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5182 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5183 Int32Regs:$r)>; 5184 5185def : Pat<(int_nvvm_sust_b_2d_i64_trap 5186 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5187 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5188 Int64Regs:$r)>; 5189 5190def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 5191 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5192 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5193 Int16Regs:$r, Int16Regs:$g)>; 5194 5195def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 5196 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5197 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5198 Int16Regs:$r, Int16Regs:$g)>; 5199 5200def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 5201 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5202 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5203 Int32Regs:$r, Int32Regs:$g)>; 5204 5205def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 5206 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5207 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5208 Int64Regs:$r, Int64Regs:$g)>; 5209 5210def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 5211 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5212 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5213 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5214 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5215 5216def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 5217 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5218 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5219 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5220 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5221 5222def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 5223 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5224 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5225 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5226 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5227 5228 5229 5230def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 5231 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5232 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 5233 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5234 Int16Regs:$r)>; 5235 5236def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 5237 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5238 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 5239 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5240 Int16Regs:$r)>; 5241 5242def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 5243 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5244 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 5245 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5246 Int32Regs:$r)>; 5247 5248def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 5249 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5250 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, 5251 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5252 Int64Regs:$r)>; 5253 5254def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 5255 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5256 Int16Regs:$r, Int16Regs:$g), 5257 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 5258 Int32Regs:$x, Int32Regs:$y, 5259 Int16Regs:$r, Int16Regs:$g)>; 5260 5261def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 5262 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5263 Int16Regs:$r, Int16Regs:$g), 5264 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 5265 Int32Regs:$x, Int32Regs:$y, 5266 Int16Regs:$r, Int16Regs:$g)>; 5267 5268def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 5269 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5270 Int32Regs:$g), 5271 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5272 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5273 5274def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 5275 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5276 Int64Regs:$g), 5277 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, 5278 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5279 5280def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 5281 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5282 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5283 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 5284 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5285 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5286 5287def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 5288 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5289 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5290 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 5291 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5292 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5293 5294def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 5295 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5296 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5297 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5298 Int32Regs:$x, Int32Regs:$y, 5299 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5300 5301 5302 5303def : Pat<(int_nvvm_sust_b_3d_i8_trap 5304 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5305 Int16Regs:$r), 5306 (SUST_B_3D_B8_TRAP_R Int64Regs:$s, 5307 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5308 Int16Regs:$r)>; 5309 5310def : Pat<(int_nvvm_sust_b_3d_i16_trap 5311 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5312 Int16Regs:$r), 5313 (SUST_B_3D_B16_TRAP_R Int64Regs:$s, 5314 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5315 Int16Regs:$r)>; 5316 5317def : Pat<(int_nvvm_sust_b_3d_i32_trap 5318 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5319 Int32Regs:$r), 5320 (SUST_B_3D_B32_TRAP_R Int64Regs:$s, 5321 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5322 Int32Regs:$r)>; 5323 5324def : Pat<(int_nvvm_sust_b_3d_i64_trap 5325 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5326 Int64Regs:$r), 5327 (SUST_B_3D_B64_TRAP_R Int64Regs:$s, 5328 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5329 Int64Regs:$r)>; 5330 5331def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 5332 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5333 Int16Regs:$r, Int16Regs:$g), 5334 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, 5335 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5336 Int16Regs:$r, Int16Regs:$g)>; 5337 5338def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 5339 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5340 Int16Regs:$r, Int16Regs:$g), 5341 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, 5342 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5343 Int16Regs:$r, Int16Regs:$g)>; 5344 5345def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 5346 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5347 Int32Regs:$r, Int32Regs:$g), 5348 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, 5349 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5350 Int32Regs:$r, Int32Regs:$g)>; 5351 5352def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 5353 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5354 Int64Regs:$r, Int64Regs:$g), 5355 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, 5356 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5357 Int64Regs:$r, Int64Regs:$g)>; 5358 5359def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 5360 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5361 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5362 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, 5363 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5364 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5365 5366def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 5367 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5368 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5369 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, 5370 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5371 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5372 5373def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 5374 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5375 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5376 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, 5377 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5378 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5379 5380 5381// .zero variant 5382def : Pat<(int_nvvm_sust_b_1d_i8_zero 5383 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5384 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5385 5386def : Pat<(int_nvvm_sust_b_1d_i16_zero 5387 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5388 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5389 5390def : Pat<(int_nvvm_sust_b_1d_i32_zero 5391 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5392 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5393 5394def : Pat<(int_nvvm_sust_b_1d_i64_zero 5395 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5396 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5397 5398def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 5399 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5400 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5401 Int16Regs:$r, Int16Regs:$g)>; 5402 5403def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 5404 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5405 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5406 Int16Regs:$r, Int16Regs:$g)>; 5407 5408def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 5409 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5410 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5411 Int32Regs:$r, Int32Regs:$g)>; 5412 5413def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 5414 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5415 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, 5416 Int64Regs:$r, Int64Regs:$g)>; 5417 5418def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 5419 Int64Regs:$s, Int32Regs:$x, 5420 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5421 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5422 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5423 5424def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 5425 Int64Regs:$s, Int32Regs:$x, 5426 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5427 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5428 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5429 5430def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 5431 Int64Regs:$s, Int32Regs:$x, 5432 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5433 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5434 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5435 5436 5437 5438def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 5439 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5440 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5441 Int16Regs:$r)>; 5442 5443def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 5444 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5445 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5446 Int16Regs:$r)>; 5447 5448def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 5449 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5450 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5451 Int32Regs:$r)>; 5452 5453def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 5454 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5455 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5456 Int64Regs:$r)>; 5457 5458def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 5459 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5460 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5461 Int16Regs:$r, Int16Regs:$g)>; 5462 5463def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 5464 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5465 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5466 Int16Regs:$r, Int16Regs:$g)>; 5467 5468def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 5469 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5470 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5471 Int32Regs:$r, Int32Regs:$g)>; 5472 5473def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 5474 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5475 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5476 Int64Regs:$r, Int64Regs:$g)>; 5477 5478def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 5479 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5480 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5481 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5482 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5483 5484def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 5485 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5486 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5487 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5488 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5489 5490def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 5491 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5492 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5493 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5494 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5495 5496 5497 5498def : Pat<(int_nvvm_sust_b_2d_i8_zero 5499 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5500 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5501 Int16Regs:$r)>; 5502 5503def : Pat<(int_nvvm_sust_b_2d_i16_zero 5504 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5505 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5506 Int16Regs:$r)>; 5507 5508def : Pat<(int_nvvm_sust_b_2d_i32_zero 5509 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5510 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5511 Int32Regs:$r)>; 5512 5513def : Pat<(int_nvvm_sust_b_2d_i64_zero 5514 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5515 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5516 Int64Regs:$r)>; 5517 5518def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 5519 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5520 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5521 Int16Regs:$r, Int16Regs:$g)>; 5522 5523def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 5524 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5525 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5526 Int16Regs:$r, Int16Regs:$g)>; 5527 5528def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 5529 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5530 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5531 Int32Regs:$r, Int32Regs:$g)>; 5532 5533def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 5534 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5535 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5536 Int64Regs:$r, Int64Regs:$g)>; 5537 5538def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 5539 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5540 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5541 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5542 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5543 5544def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 5545 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5546 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5547 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5548 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5549 5550def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 5551 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5552 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5553 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5554 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5555 5556 5557 5558def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 5559 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5560 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, 5561 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5562 Int16Regs:$r)>; 5563 5564def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 5565 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5566 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, 5567 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5568 Int16Regs:$r)>; 5569 5570def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 5571 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5572 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, 5573 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5574 Int32Regs:$r)>; 5575 5576def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 5577 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5578 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, 5579 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5580 Int64Regs:$r)>; 5581 5582def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 5583 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5584 Int16Regs:$r, Int16Regs:$g), 5585 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, 5586 Int32Regs:$x, Int32Regs:$y, 5587 Int16Regs:$r, Int16Regs:$g)>; 5588 5589def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 5590 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5591 Int16Regs:$r, Int16Regs:$g), 5592 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, 5593 Int32Regs:$x, Int32Regs:$y, 5594 Int16Regs:$r, Int16Regs:$g)>; 5595 5596def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 5597 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5598 Int32Regs:$g), 5599 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5600 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5601 5602def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 5603 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5604 Int64Regs:$g), 5605 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, 5606 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5607 5608def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 5609 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5610 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5611 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, 5612 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5613 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5614 5615def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 5616 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5617 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5618 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, 5619 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5620 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5621 5622def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 5623 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5624 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5625 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5626 Int32Regs:$x, Int32Regs:$y, 5627 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5628 5629 5630 5631def : Pat<(int_nvvm_sust_b_3d_i8_zero 5632 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5633 Int16Regs:$r), 5634 (SUST_B_3D_B8_ZERO_R Int64Regs:$s, 5635 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5636 Int16Regs:$r)>; 5637 5638def : Pat<(int_nvvm_sust_b_3d_i16_zero 5639 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5640 Int16Regs:$r), 5641 (SUST_B_3D_B16_ZERO_R Int64Regs:$s, 5642 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5643 Int16Regs:$r)>; 5644 5645def : Pat<(int_nvvm_sust_b_3d_i32_zero 5646 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5647 Int32Regs:$r), 5648 (SUST_B_3D_B32_ZERO_R Int64Regs:$s, 5649 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5650 Int32Regs:$r)>; 5651 5652def : Pat<(int_nvvm_sust_b_3d_i64_zero 5653 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5654 Int64Regs:$r), 5655 (SUST_B_3D_B64_ZERO_R Int64Regs:$s, 5656 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5657 Int64Regs:$r)>; 5658 5659def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 5660 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5661 Int16Regs:$r, Int16Regs:$g), 5662 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, 5663 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5664 Int16Regs:$r, Int16Regs:$g)>; 5665 5666def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 5667 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5668 Int16Regs:$r, Int16Regs:$g), 5669 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, 5670 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5671 Int16Regs:$r, Int16Regs:$g)>; 5672 5673def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 5674 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5675 Int32Regs:$r, Int32Regs:$g), 5676 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, 5677 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5678 Int32Regs:$r, Int32Regs:$g)>; 5679 5680def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 5681 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5682 Int64Regs:$r, Int64Regs:$g), 5683 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, 5684 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5685 Int64Regs:$r, Int64Regs:$g)>; 5686 5687def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 5688 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5689 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5690 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, 5691 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5692 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5693 5694def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 5695 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5696 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5697 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, 5698 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5699 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5700 5701def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 5702 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5703 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5704 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, 5705 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5706 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5707 5708 5709 5710 5711def : Pat<(int_nvvm_sust_p_1d_i8_trap 5712 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5713 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5714 5715def : Pat<(int_nvvm_sust_p_1d_i16_trap 5716 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5717 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5718 5719def : Pat<(int_nvvm_sust_p_1d_i32_trap 5720 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5721 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5722 5723def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 5724 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5725 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5726 Int16Regs:$r, Int16Regs:$g)>; 5727 5728def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 5729 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5730 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5731 Int16Regs:$r, Int16Regs:$g)>; 5732 5733def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 5734 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5735 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5736 Int32Regs:$r, Int32Regs:$g)>; 5737 5738def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 5739 Int64Regs:$s, Int32Regs:$x, 5740 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5741 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5742 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5743 5744def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 5745 Int64Regs:$s, Int32Regs:$x, 5746 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5747 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5748 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5749 5750def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 5751 Int64Regs:$s, Int32Regs:$x, 5752 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5753 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5754 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5755 5756 5757 5758def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 5759 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5760 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5761 Int16Regs:$r)>; 5762 5763def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 5764 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5765 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5766 Int16Regs:$r)>; 5767 5768def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 5769 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5770 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5771 Int32Regs:$r)>; 5772 5773def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 5774 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5775 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5776 Int16Regs:$r, Int16Regs:$g)>; 5777 5778def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 5779 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5780 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5781 Int16Regs:$r, Int16Regs:$g)>; 5782 5783def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 5784 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5785 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5786 Int32Regs:$r, Int32Regs:$g)>; 5787 5788def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 5789 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5790 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5791 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5792 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5793 5794def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 5795 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5796 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5797 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5798 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5799 5800def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 5801 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5802 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5803 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5804 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5805 5806 5807 5808def : Pat<(int_nvvm_sust_p_2d_i8_trap 5809 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5810 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5811 Int16Regs:$r)>; 5812 5813def : Pat<(int_nvvm_sust_p_2d_i16_trap 5814 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5815 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5816 Int16Regs:$r)>; 5817 5818def : Pat<(int_nvvm_sust_p_2d_i32_trap 5819 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5820 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5821 Int32Regs:$r)>; 5822 5823def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 5824 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5825 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5826 Int16Regs:$r, Int16Regs:$g)>; 5827 5828def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 5829 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5830 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5831 Int16Regs:$r, Int16Regs:$g)>; 5832 5833def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 5834 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5835 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5836 Int32Regs:$r, Int32Regs:$g)>; 5837 5838def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 5839 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5840 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5841 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5842 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5843 5844def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 5845 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5846 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5847 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5848 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5849 5850def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 5851 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5852 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5853 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5854 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5855 5856 5857 5858def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 5859 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5860 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 5861 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5862 Int16Regs:$r)>; 5863 5864def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 5865 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5866 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 5867 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5868 Int16Regs:$r)>; 5869 5870def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 5871 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5872 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 5873 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5874 Int32Regs:$r)>; 5875 5876def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 5877 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5878 Int16Regs:$r, Int16Regs:$g), 5879 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 5880 Int32Regs:$x, Int32Regs:$y, 5881 Int16Regs:$r, Int16Regs:$g)>; 5882 5883def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 5884 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5885 Int16Regs:$r, Int16Regs:$g), 5886 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 5887 Int32Regs:$x, Int32Regs:$y, 5888 Int16Regs:$r, Int16Regs:$g)>; 5889 5890def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 5891 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5892 Int32Regs:$g), 5893 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5894 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5895 5896def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 5897 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5898 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5899 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 5900 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5901 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5902 5903def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 5904 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5905 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5906 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 5907 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5908 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5909 5910def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 5911 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5912 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5913 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5914 Int32Regs:$x, Int32Regs:$y, 5915 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5916 5917 5918 5919def : Pat<(int_nvvm_sust_p_3d_i8_trap 5920 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5921 Int16Regs:$r), 5922 (SUST_P_3D_B8_TRAP_R Int64Regs:$s, 5923 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5924 Int16Regs:$r)>; 5925 5926def : Pat<(int_nvvm_sust_p_3d_i16_trap 5927 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5928 Int16Regs:$r), 5929 (SUST_P_3D_B16_TRAP_R Int64Regs:$s, 5930 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5931 Int16Regs:$r)>; 5932 5933def : Pat<(int_nvvm_sust_p_3d_i32_trap 5934 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5935 Int32Regs:$r), 5936 (SUST_P_3D_B32_TRAP_R Int64Regs:$s, 5937 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5938 Int32Regs:$r)>; 5939 5940def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 5941 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5942 Int16Regs:$r, Int16Regs:$g), 5943 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, 5944 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5945 Int16Regs:$r, Int16Regs:$g)>; 5946 5947def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 5948 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5949 Int16Regs:$r, Int16Regs:$g), 5950 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, 5951 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5952 Int16Regs:$r, Int16Regs:$g)>; 5953 5954def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 5955 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5956 Int32Regs:$r, Int32Regs:$g), 5957 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, 5958 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5959 Int32Regs:$r, Int32Regs:$g)>; 5960 5961def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 5962 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5963 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5964 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, 5965 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5966 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5967 5968def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 5969 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5970 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5971 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, 5972 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5973 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5974 5975def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 5976 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5977 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5978 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, 5979 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5980 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5981 5982//----------------------------------- 5983// Read Special Registers 5984//----------------------------------- 5985 5986class PTX_READ_SREG_R64<string regname, Intrinsic intop> 5987 : NVPTXInst<(outs Int64Regs:$d), (ins), 5988 !strconcat("mov.u64 \t$d, %", regname, ";"), 5989 [(set Int64Regs:$d, (intop))]>; 5990 5991class PTX_READ_SREG_R32<string regname, Intrinsic intop> 5992 : NVPTXInst<(outs Int32Regs:$d), (ins), 5993 !strconcat("mov.u32 \t$d, %", regname, ";"), 5994 [(set Int32Regs:$d, (intop))]>; 5995 5996// TODO Add read vector-version of special registers 5997 5998def INT_PTX_SREG_TID_X : 5999 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>; 6000def INT_PTX_SREG_TID_Y : 6001 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>; 6002def INT_PTX_SREG_TID_Z : 6003 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>; 6004def INT_PTX_SREG_TID_W : 6005 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>; 6006 6007def INT_PTX_SREG_NTID_X : 6008 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>; 6009def INT_PTX_SREG_NTID_Y : 6010 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>; 6011def INT_PTX_SREG_NTID_Z : 6012 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>; 6013def INT_PTX_SREG_NTID_W : 6014 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>; 6015 6016def INT_PTX_SREG_LANEID : 6017 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 6018def INT_PTX_SREG_WARPID : 6019 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 6020def INT_PTX_SREG_NWARPID : 6021 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 6022 6023def INT_PTX_SREG_CTAID_X : 6024 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>; 6025def INT_PTX_SREG_CTAID_Y : 6026 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>; 6027def INT_PTX_SREG_CTAID_Z : 6028 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>; 6029def INT_PTX_SREG_CTAID_W : 6030 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>; 6031 6032def INT_PTX_SREG_NCTAID_X : 6033 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>; 6034def INT_PTX_SREG_NCTAID_Y : 6035 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>; 6036def INT_PTX_SREG_NCTAID_Z : 6037 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>; 6038def INT_PTX_SREG_NCTAID_W : 6039 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>; 6040 6041def INT_PTX_SREG_SMID : 6042 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 6043def INT_PTX_SREG_NSMID : 6044 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 6045def INT_PTX_SREG_GRIDID : 6046 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 6047 6048def INT_PTX_SREG_LANEMASK_EQ : 6049 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 6050def INT_PTX_SREG_LANEMASK_LE : 6051 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 6052def INT_PTX_SREG_LANEMASK_LT : 6053 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 6054def INT_PTX_SREG_LANEMASK_GE : 6055 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 6056def INT_PTX_SREG_LANEMASK_GT : 6057 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 6058 6059def INT_PTX_SREG_CLOCK : 6060 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 6061def INT_PTX_SREG_CLOCK64 : 6062 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 6063 6064def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 6065def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 6066def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 6067def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 6068 6069// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 6070// handle the constant. 6071def INT_PTX_SREG_WARPSIZE : 6072 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 6073 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 6074 6075// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 6076// In addition to target-independent fields provided by WMMA_REGS, it adds 6077// the fields commonly used to implement specific PTX instruction -- register 6078// types and names, constraints, parts of assembly, etc. 6079class WMMA_REGINFO<WMMA_REGS r, string op> 6080 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 6081 // NVPTX register types used to carry fragment data. 6082 NVPTXRegClass regclass = !cond( 6083 !eq(ptx_elt_type, "f16") : Float16x2Regs, 6084 !eq(ptx_elt_type, "f32") : Float32Regs, 6085 !eq(ptx_elt_type, "f64") : Float64Regs, 6086 !eq(ptx_elt_type, "bf16") : Int32Regs, 6087 !eq(ptx_elt_type, "tf32") : Int32Regs, 6088 !eq(ptx_elt_type, "s32") : Int32Regs, 6089 !eq(ptx_elt_type, "b16") : Int32Regs, 6090 !eq(ptx_elt_type, "s8") : Int32Regs, 6091 !eq(ptx_elt_type, "u8") : Int32Regs, 6092 !eq(ptx_elt_type, "s4") : Int32Regs, 6093 !eq(ptx_elt_type, "u4") : Int32Regs, 6094 !eq(ptx_elt_type, "b1") : Int32Regs); 6095 6096 // Instruction input/output arguments for the fragment. 6097 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 6098 6099 // List of register names for the fragment -- ["ra0", "ra1",...] 6100 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 6101 6102 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 6103 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 6104 6105 // Predicates for particular fragment variant. Technically those are 6106 // per-instruction predicates, but currently all fragments that can be used in 6107 // a given instruction are subject to the same constraints, so an instruction 6108 // can use predicates from any of its fragments. If/when this is no 6109 // longer the case, we can concat all per-fragment predicates to enforce that 6110 // all fragments of the instruction are viable. 6111 list<Predicate> Predicates = !cond( 6112 // fp16 -> fp16/fp32 @ m16n16k16 6113 !and(!eq(geom, "m16n16k16"), 6114 !or(!eq(ptx_elt_type, "f16"), 6115 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60], 6116 6117 !and(!eq(geom,"m8n8k4"), 6118 !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70], 6119 6120 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 6121 !and(!or(!eq(geom, "m8n32k16"), 6122 !eq(geom, "m32n8k16")), 6123 !or(!eq(ptx_elt_type, "f16"), 6124 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61], 6125 6126 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 6127 !and(!or(!eq(geom,"m16n16k16"), 6128 !eq(geom,"m8n32k16"), 6129 !eq(geom,"m32n8k16")), 6130 !or(!eq(ptx_elt_type, "u8"), 6131 !eq(ptx_elt_type, "s8"), 6132 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63], 6133 6134 !and(!or(!eq(geom,"m16n16k16"), 6135 !eq(geom,"m8n32k16"), 6136 !eq(geom,"m32n8k16")), 6137 !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70], 6138 6139 !and(!eq(geom,"m16n16k8"), 6140 !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70], 6141 6142 !and(!eq(geom,"m16n16k8"), 6143 !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70], 6144 6145 // b1 -> s32 @ m8n8k128(b1) 6146 !and(!ne(op,"mma"), 6147 !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63], 6148 6149 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 6150 !and(!ne(op,"mma"), 6151 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63], 6152 6153 !or(!eq(geom,"m16n8k8"), 6154 !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65], 6155 6156 !and(!ne(ptx_elt_type,"f64"), 6157 !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64], 6158 6159 // mma m8n8k32 requires higher PTX version 6160 !and(!eq(op,"mma"), 6161 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65], 6162 6163 !and(!eq(ptx_elt_type,"f64"), 6164 !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70], 6165 6166 !and(!eq(op,"mma"), 6167 !or(!eq(geom, "m16n8k16"), 6168 !eq(geom, "m16n8k4"), 6169 !eq(geom, "m16n8k32"), 6170 !eq(geom, "m16n8k64"), 6171 !eq(geom, "m8n8k128"), 6172 !eq(geom, "m16n8k128"), 6173 !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70], 6174 6175 !and(!eq(op,"ldmatrix"), 6176 !eq(ptx_elt_type,"b16"), 6177 !eq(geom, "m8n8")) : [hasSM75, hasPTX65]); 6178 6179 // template DAGs for instruction inputs/output. 6180 dag Outs = !dag(outs, ptx_regs, reg_names); 6181 dag Ins = !dag(ins, ptx_regs, reg_names); 6182} 6183 6184// Convert dag of arguments into a dag to match given intrinsic. 6185class BuildPatternI<Intrinsic Intr, dag Ins> { 6186 // Build a dag pattern that matches the intrinsic call. 6187 dag ret = !foreach(tmp, Ins, 6188 !subst(imem, ADDRvar, 6189 !subst(MEMri64, ADDRri64, 6190 !subst(MEMri, ADDRri, 6191 !subst(ins, Intr, tmp))))); 6192} 6193 6194// Same as above, but uses PatFrag instead of an Intrinsic. 6195class BuildPatternPF<PatFrag Intr, dag Ins> { 6196 // Build a dag pattern that matches the intrinsic call. 6197 dag ret = !foreach(tmp, Ins, 6198 !subst(imem, ADDRvar, 6199 !subst(MEMri64, ADDRri64, 6200 !subst(MEMri, ADDRri, 6201 !subst(ins, Intr, tmp))))); 6202} 6203 6204// Common WMMA-related fields used for building patterns for all MMA instructions. 6205class WMMA_INSTR<string _Intr, list<dag> _Args> 6206 : NVPTXInst<(outs), (ins), "?", []> { 6207 Intrinsic Intr = !cast<Intrinsic>(_Intr); 6208 // Concatenate all arguments into a single dag. 6209 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 6210 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 6211 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 6212} 6213 6214// 6215// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6216// 6217 6218class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 6219 DAGOperand SrcOp> 6220 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 6221 [!con((ins SrcOp:$src), 6222 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6223 Requires<Frag.Predicates> { 6224 // Load/store intrinsics are overloaded on pointer's address space. 6225 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6226 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6227 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 6228 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 6229 // Build PatFrag that only matches particular address space. 6230 PatFrag IntrFrag = PatFrag<PFOperands, 6231 PFOperandsIntr, 6232 !cond(!eq(Space, ".shared"): AS_match.shared, 6233 !eq(Space, ".global"): AS_match.global, 6234 true: AS_match.generic)>; 6235 // Build AS-constrained pattern. 6236 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6237 6238 let OutOperandList = Frag.Outs; 6239 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6240 let AsmString = "wmma.load." 6241 # Frag.frag 6242 # ".sync" 6243 # "${ptx:aligned}" 6244 # "." # Layout 6245 # "." # Frag.geom 6246 # Space 6247 # "." # Frag.ptx_elt_type # " \t" 6248 # Frag.regstring 6249 # ", [$src]" 6250 # !if(WithStride, ", $ldm", "") 6251 # ";"; 6252} 6253 6254// 6255// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6256// 6257class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 6258 bit WithStride, DAGOperand DstOp> 6259 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 6260 [!con((ins DstOp:$dst), 6261 Frag.Ins, 6262 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6263 Requires<Frag.Predicates> { 6264 6265 // Load/store intrinsics are overloaded on pointer's address space. 6266 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6267 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6268 dag PFOperands = !con((ops node:$dst), 6269 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 6270 !if(WithStride, (ops node:$ldm), (ops))); 6271 // Build PatFrag that only matches particular address space. 6272 PatFrag IntrFrag = PatFrag<PFOperands, 6273 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 6274 !cond(!eq(Space, ".shared"): AS_match.shared, 6275 !eq(Space, ".global"): AS_match.global, 6276 true: AS_match.generic)>; 6277 // Build AS-constrained pattern. 6278 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6279 6280 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6281 let OutOperandList = (outs); 6282 let AsmString = "wmma.store.d.sync" 6283 # "${ptx:aligned}" 6284 # "." # Layout 6285 # "." # Frag.geom 6286 # Space 6287 # "." # Frag.ptx_elt_type 6288 # " \t[$dst]," 6289 # Frag.regstring 6290 # !if(WithStride, ", $ldm", "") 6291 # ";"; 6292} 6293 6294// Create all load/store variants 6295defset list<WMMA_INSTR> MMA_LDSTs = { 6296 foreach layout = ["row", "col"] in { 6297 foreach stride = [false, true] in { 6298 foreach space = [".global", ".shared", ""] in { 6299 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6300 foreach frag = NVVM_MMA_OPS.all_ld_ops in 6301 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6302 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 6303 foreach frag = NVVM_MMA_OPS.all_st_ops in 6304 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6305 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 6306 } // addr 6307 } // space 6308 } // stride 6309 } // layout 6310} // defset 6311 6312// B1 instruction variants need extra constraints. 6313class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 6314 string Op = b1op; 6315 WMMA_REGINFO Frag = FragA; 6316 list<Predicate> ret = !listconcat( 6317 FragA.Predicates, 6318 !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[]) 6319 ); 6320} 6321// WMMA.MMA 6322class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6323 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6324 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 6325 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 6326 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6327 // Requires does not seem to have effect on Instruction w/o Patterns. 6328 // We set it here anyways and propagate to the Pat<> we construct below. 6329 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6330 let OutOperandList = FragD.Outs; 6331 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6332 string TypeList = !cond( 6333 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 6334 # "." # FragC.ptx_elt_type, 6335 1: "." # FragD.ptx_elt_type 6336 # "." # FragA.ptx_elt_type 6337 # "." # FragB.ptx_elt_type 6338 # "." # FragC.ptx_elt_type, 6339 ); 6340 let AsmString = "wmma.mma" 6341 # b1op 6342 # ".sync" 6343 # "${ptx:aligned}" 6344 # "." # ALayout 6345 # "." # BLayout 6346 # "." # FragA.geom 6347 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 6348 # TypeList 6349 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 6350 # FragD.regstring # ",\n\t\t" 6351 # FragA.regstring # ",\n\t\t" 6352 # FragB.regstring # ",\n\t\t" 6353 # FragC.regstring # ";"; 6354} 6355 6356defset list<WMMA_INSTR> WMMAs = { 6357 foreach layout_a = ["row", "col"] in { 6358 foreach layout_b = ["row", "col"] in { 6359 foreach satf = [0, 1] in { 6360 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 6361 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 6362 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6363 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 6364 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 6365 WMMA_REGINFO<op[1], "wmma.mma">, 6366 WMMA_REGINFO<op[2], "wmma.mma">, 6367 WMMA_REGINFO<op[3], "wmma.mma">, 6368 layout_a, layout_b, satf, rnd, b1op>; 6369 } 6370 } // b1op 6371 } // op 6372 } // rnd 6373 } // satf 6374 } // layout_b 6375 } // layout_a 6376} // defset 6377 6378// MMA 6379class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6380 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6381 string ALayout, string BLayout, int Satfinite, string b1op> 6382 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 6383 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6384 // Requires does not seem to have effect on Instruction w/o Patterns. 6385 // We set it here anyways and propagate to the Pat<> we construct below. 6386 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6387 let OutOperandList = FragD.Outs; 6388 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6389 string TypeList = "." # FragD.ptx_elt_type 6390 # "." # FragA.ptx_elt_type 6391 # "." # FragB.ptx_elt_type 6392 # "." # FragC.ptx_elt_type; 6393 let AsmString = "mma.sync.aligned." 6394 # FragA.geom 6395 # "." # ALayout 6396 # "." # BLayout 6397 # !if(Satfinite, ".satfinite", "") 6398 # TypeList 6399 # b1op # "\n\t\t" 6400 # FragD.regstring # ",\n\t\t" 6401 # FragA.regstring # ",\n\t\t" 6402 # FragB.regstring # ",\n\t\t" 6403 # FragC.regstring # ";"; 6404} 6405 6406defset list<WMMA_INSTR> MMAs = { 6407 foreach layout_a = ["row", "col"] in { 6408 foreach layout_b = ["row", "col"] in { 6409 foreach satf = [0, 1] in { 6410 foreach op = NVVM_MMA_OPS.all_mma_ops in { 6411 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6412 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 6413 def : MMA<WMMA_REGINFO<op[0], "mma">, 6414 WMMA_REGINFO<op[1], "mma">, 6415 WMMA_REGINFO<op[2], "mma">, 6416 WMMA_REGINFO<op[3], "mma">, 6417 layout_a, layout_b, satf, b1op>; 6418 } 6419 } // b1op 6420 } // op 6421 } // satf 6422 } // layout_b 6423 } // layout_a 6424} // defset 6425 6426// 6427// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 6428// 6429class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, 6430 DAGOperand SrcOp> 6431 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, 6432 Requires<Frag.Predicates> { 6433 // Build PatFrag that only matches particular address space. 6434 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), 6435 !cond(!eq(Space, ".shared"): AS_match.shared, 6436 true: AS_match.generic)>; 6437 // Build AS-constrained pattern. 6438 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6439 6440 let OutOperandList = Frag.Outs; 6441 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6442 let AsmString = "ldmatrix.sync.aligned." 6443 # Frag.geom 6444 # "." # Frag.frag 6445 # !if(Transposed, ".trans", "") 6446 # Space 6447 # "." # Frag.ptx_elt_type 6448 # " " # Frag.regstring # ", [$src];"; 6449} 6450 6451// Create all ldmatrix variants 6452defset list<WMMA_INSTR> LDMATRIXs = { 6453 foreach transposed = [false, true] in { 6454 foreach space = [".shared", ""] in { 6455 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6456 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in 6457 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then 6458 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, 6459 addr>; 6460 } // addr 6461 } // space 6462 } // transposed 6463} // defset 6464 6465// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 6466// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 6467// the instruction record. 6468class MMA_PAT<WMMA_INSTR wi> 6469 : Pat<wi.IntrinsicPattern, 6470 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 6471 (wi ptx.version))>, 6472 Requires<wi.Predicates>; 6473 6474// Build intrinsic->instruction patterns for all MMA instructions. 6475foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in 6476 def : MMA_PAT<mma>; 6477