1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX60, hasSM30]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX60, hasSM30]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX60, hasSM30]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX60, hasSM30]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX60, hasSM30]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX60, hasSM30]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX60, hasSM30]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX60, hasSM30]>; 135 136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 137 bit offset_imm, bit mask_imm, bit threadmask_imm> 138 : NVPTXInst<(outs), (ins), "?", []> { 139 NVPTXRegClass rc = !cond( 140 !eq(reg, "i32"): Int32Regs, 141 !eq(reg, "f32"): Float32Regs); 142 string IntrName = "int_nvvm_shfl_" 143 # !if(sync, "sync_", "") 144 # mode 145 # "_" # reg 146 # !if(return_pred, "p", ""); 147 Intrinsic Intr = !cast<Intrinsic>(IntrName); 148 let InOperandList = !con( 149 !if(sync, 150 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 151 (ins)), 152 (ins rc:$src), 153 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 154 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 155 ); 156 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 157 let AsmString = "shfl." 158 # !if(sync, "sync.", "") 159 # mode # ".b32\t" 160 # "$dst" 161 # !if(return_pred, "|$pred", "") # ", " 162 # "$src, $offset, $mask" 163 # !if(sync, ", $threadmask", "") 164 # ";" 165 ; 166 let Pattern = [!con( 167 !foreach(tmp, OutOperandList, 168 !subst(outs, set, 169 !subst(i32imm, imm, tmp))), 170 (set !foreach(tmp, InOperandList, 171 !subst(ins, Intr, 172 !subst(i32imm, imm, tmp)))) 173 )]; 174} 175 176foreach sync = [false, true] in { 177 foreach mode = ["up", "down", "bfly", "idx"] in { 178 foreach regclass = ["i32", "f32"] in { 179 foreach return_pred = [false, true] in { 180 foreach offset_imm = [false, true] in { 181 foreach mask_imm = [false, true] in { 182 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 183 def : SHFL_INSTR<sync, mode, regclass, return_pred, 184 offset_imm, mask_imm, threadmask_imm>, 185 Requires<!if(sync, [hasSM30, hasPTX60], [hasSM30, hasSHFL])>; 186 } 187 } 188 } 189 } 190 } 191 } 192} 193 194// vote.{all,any,uni,ballot} 195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 196 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 197 "vote." # mode # " \t$dest, $pred;", 198 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 199 Requires<[hasPTX60, hasSM30]>; 200} 201 202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 206 207// vote.sync.{all,any,uni,ballot} 208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 209 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 210 "vote.sync." # mode # " \t$dest, $pred, $mask;", 211 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 212 Requires<[hasPTX60, hasSM30]>; 213 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 214 "vote.sync." # mode #" \t$dest, $pred, $mask;", 215 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 216 Requires<[hasPTX60, hasSM30]>; 217} 218 219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 223 224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 225 Operand ImmOp> { 226 def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), 227 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 228 [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, 229 Requires<[hasPTX60, hasSM70]>; 230 def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), 231 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 232 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 233 Requires<[hasPTX60, hasSM70]>; 234 def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), 235 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 236 [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, 237 Requires<[hasPTX60, hasSM70]>; 238 def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), 239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 240 [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 241 Requires<[hasPTX60, hasSM70]>; 242} 243 244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 245 i32imm>; 246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 247 i64imm>; 248 249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 250 Operand ImmOp> { 251 def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 252 (ins i32imm:$mask, ImmOp:$value), 253 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 254 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 255 Requires<[hasPTX60, hasSM70]>; 256 def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 257 (ins Int32Regs:$mask, ImmOp:$value), 258 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 259 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 260 Requires<[hasPTX60, hasSM70]>; 261 def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 262 (ins i32imm:$mask, regclass:$value), 263 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 264 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 265 Requires<[hasPTX60, hasSM70]>; 266 def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 267 (ins Int32Regs:$mask, regclass:$value), 268 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 269 [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 270 Requires<[hasPTX60, hasSM70]>; 271} 272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 273 i32imm>; 274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 275 i64imm>; 276 277multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 278 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 279 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 280 [(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>, 281 Requires<[hasPTX70, hasSM80]>; 282} 283 284defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 285defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 286defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 287defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 288defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 289defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 290defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 291defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 292 293} // isConvergent = true 294 295//----------------------------------- 296// Explicit Memory Fence Functions 297//----------------------------------- 298class MEMBAR<string StrOp, Intrinsic IntOP> : 299 NVPTXInst<(outs), (ins), 300 StrOp, [(IntOP)]>; 301 302def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 303def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 304def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 305 306 307//----------------------------------- 308// Async Copy Functions 309//----------------------------------- 310 311multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 312 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 313 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 314 [(Intrin Int32Regs:$addr)]>, 315 Requires<[hasPTX70, hasSM80]>; 316 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 317 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 318 [(Intrin Int64Regs:$addr)]>, 319 Requires<[hasPTX70, hasSM80]>; 320} 321 322defm CP_ASYNC_MBARRIER_ARRIVE : 323 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 324defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 325 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 326defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 327 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 328defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 329 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 330 331multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> { 332 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 333 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 334 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 335 Requires<[hasPTX70, hasSM80]>; 336 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 337 !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"), 338 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 339 Requires<[hasPTX70, hasSM80]>; 340} 341 342defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 343 CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>; 344 345defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 346 CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>; 347 348defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 349 CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>; 350 351multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> { 352 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 353 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 354 [(Intrin Int32Regs:$dst, Int32Regs:$src)]>, 355 Requires<[hasPTX70, hasSM80]>; 356 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 357 !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"), 358 [(Intrin Int64Regs:$dst, Int64Regs:$src)]>, 359 Requires<[hasPTX70, hasSM80]>; 360} 361 362defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 363 CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>; 364 365def CP_ASYNC_COMMIT_GROUP : 366 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 367 Requires<[hasPTX70, hasSM80]>; 368 369def CP_ASYNC_WAIT_GROUP : 370 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 371 [(int_nvvm_cp_async_wait_group (i32 timm:$n))]>, 372 Requires<[hasPTX70, hasSM80]>; 373 374def CP_ASYNC_WAIT_ALL : 375 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 376 [(int_nvvm_cp_async_wait_all)]>, 377 Requires<[hasPTX70, hasSM80]>; 378 379//----------------------------------- 380// MBarrier Functions 381//----------------------------------- 382 383multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 384 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 385 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 386 [(Intrin Int32Regs:$addr, Int32Regs:$count)]>, 387 Requires<[hasPTX70, hasSM80]>; 388 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 389 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 390 [(Intrin Int64Regs:$addr, Int32Regs:$count)]>, 391 Requires<[hasPTX70, hasSM80]>; 392} 393 394defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 395defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 396 int_nvvm_mbarrier_init_shared>; 397 398multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 399 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 400 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 401 [(Intrin Int32Regs:$addr)]>, 402 Requires<[hasPTX70, hasSM80]>; 403 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 404 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 405 [(Intrin Int64Regs:$addr)]>, 406 Requires<[hasPTX70, hasSM80]>; 407} 408 409defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 410defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 411 int_nvvm_mbarrier_inval_shared>; 412 413multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 414 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 415 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 416 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 417 Requires<[hasPTX70, hasSM80]>; 418 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 419 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 420 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 421 Requires<[hasPTX70, hasSM80]>; 422} 423 424defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 425defm MBARRIER_ARRIVE_SHARED : 426 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 427 428multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 429 def _32 : NVPTXInst<(outs Int64Regs:$state), 430 (ins Int32Regs:$addr, Int32Regs:$count), 431 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 432 ".b64 $state, [$addr], $count;"), 433 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 434 Requires<[hasPTX70, hasSM80]>; 435 def _64 : NVPTXInst<(outs Int64Regs:$state), 436 (ins Int64Regs:$addr, Int32Regs:$count), 437 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 438 ".b64 $state, [$addr], $count;"), 439 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 440 Requires<[hasPTX70, hasSM80]>; 441} 442 443defm MBARRIER_ARRIVE_NOCOMPLETE : 444 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 445defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 446 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 447 448multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 449 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 450 !strconcat("mbarrier.arrive_drop", AddrSpace, 451 ".b64 $state, [$addr];"), 452 [(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>, 453 Requires<[hasPTX70, hasSM80]>; 454 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 455 !strconcat("mbarrier.arrive_drop", AddrSpace, 456 ".b64 $state, [$addr];"), 457 [(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>, 458 Requires<[hasPTX70, hasSM80]>; 459} 460 461defm MBARRIER_ARRIVE_DROP : 462 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 463defm MBARRIER_ARRIVE_DROP_SHARED : 464 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 465 466multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 467 def _32 : NVPTXInst<(outs Int64Regs:$state), 468 (ins Int32Regs:$addr, Int32Regs:$count), 469 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 470 ".b64 $state, [$addr], $count;"), 471 [(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>, 472 Requires<[hasPTX70, hasSM80]>; 473 def _64 : NVPTXInst<(outs Int64Regs:$state), 474 (ins Int64Regs:$addr, Int32Regs:$count), 475 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 476 ".b64 $state, [$addr], $count;"), 477 [(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>, 478 Requires<[hasPTX70, hasSM80]>; 479} 480 481defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 482 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 483defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 484 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 485 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 486 487multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 488 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 489 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 490 [(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>, 491 Requires<[hasPTX70, hasSM80]>; 492 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 493 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 494 [(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>, 495 Requires<[hasPTX70, hasSM80]>; 496} 497 498defm MBARRIER_TEST_WAIT : 499 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 500defm MBARRIER_TEST_WAIT_SHARED : 501 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 502 503class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 504 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 505 "mbarrier.pending_count.b64 $res, $state;", 506 [(set Int32Regs:$res, (Intrin Int64Regs:$state))]>, 507 Requires<[hasPTX70, hasSM80]>; 508 509def MBARRIER_PENDING_COUNT : 510 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 511 512//----------------------------------- 513// Math Functions 514//----------------------------------- 515 516// Map min(1.0, max(0.0, x)) to sat(x) 517// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 518// NaN 519// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 520// Same story for fmax, fmin. 521 522def : Pat<(int_nvvm_fmin_f immFloat1, 523 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 524 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 525def : Pat<(int_nvvm_fmin_f immFloat1, 526 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 527 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 528def : Pat<(int_nvvm_fmin_f 529 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 530 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 531def : Pat<(int_nvvm_fmin_f 532 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 533 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 534 535def : Pat<(int_nvvm_fmin_d immDouble1, 536 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 537 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 538def : Pat<(int_nvvm_fmin_d immDouble1, 539 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 540 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 541def : Pat<(int_nvvm_fmin_d 542 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 543 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 544def : Pat<(int_nvvm_fmin_d 545 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 546 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 547 548 549// We need a full string for OpcStr here because we need to deal with case like 550// INT_PTX_RECIP. 551class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 552 NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 553 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 554 OpcStr, 555 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>, 556 Requires<Preds>; 557 558// We need a full string for OpcStr here because we need to deal with the case 559// like INT_PTX_NATIVE_POWR_F. 560class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 561 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP, 562 list<Predicate> Preds = []> 563 : NVPTXInst<(outs t_regclass:$dst), 564 (ins s0_regclass:$src0, s1_regclass:$src1), 565 OpcStr, 566 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>, 567 Requires<Preds>; 568 569class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 570 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 571 NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 572 : NVPTXInst<(outs t_regclass:$dst), 573 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 574 OpcStr, 575 [(set t_regclass:$dst, 576 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>, 577 Requires<Preds>; 578 579// 580// MISC 581// 582 583def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 584 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 585 586// 587// Min Max 588// 589 590def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 591 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 592def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 593 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 594def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;", 595 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f, 596 [hasPTX70, hasSM80]>; 597def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;", 598 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f, 599 [hasPTX70, hasSM80]>; 600def INT_NVVM_FMIN_XORSIGN_ABS_F : 601 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;", 602 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f, 603 [hasPTX72, hasSM86]>; 604def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F : 605 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 606 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f, 607 [hasPTX72, hasSM86]>; 608def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F : 609 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 610 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f, 611 [hasPTX72, hasSM86]>; 612def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F : 613 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 614 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f, 615 [hasPTX72, hasSM86]>; 616 617def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 618 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 619def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 620 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 621def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;", 622 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f, 623 [hasPTX70, hasSM80]>; 624def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;", 625 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f, 626 [hasPTX70, hasSM80]>; 627def INT_NVVM_FMAX_XORSIGN_ABS_F : 628 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;", 629 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f, 630 [hasPTX72, hasSM86]>; 631def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F : 632 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 633 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f, 634 [hasPTX72, hasSM86]>; 635def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F : 636 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 637 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f, 638 [hasPTX72, hasSM86]>; 639def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F : 640 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 641 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f, 642 [hasPTX72, hasSM86]>; 643 644def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 645 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 646def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 647 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 648 649// 650// Min Max f16, f16x2, bf16, bf16x2 651// 652 653class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 654 list<Predicate> Preds = [hasPTX70, hasSM80]> { 655 string Variant = V; 656 Intrinsic Intr = I; 657 NVPTXRegClass RegClass = RC; 658 list<Predicate> Predicates = Preds; 659} 660 661multiclass MIN_MAX<string IntName> { 662 foreach P = [ 663 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, 664 int_nvvm_fmax_f16), Float16Regs>, 665 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, 666 int_nvvm_fmax_ftz_f16), Float16Regs>, 667 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, 668 int_nvvm_fmax_nan_f16), Float16Regs>, 669 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), 670 int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>, 671 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), 672 int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), 673 Float16Regs, [hasPTX72, hasSM86]>, 674 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), 675 int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), 676 Float16Regs, [hasPTX72, hasSM86]>, 677 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 678 int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), 679 Float16Regs, [hasPTX72, hasSM86]>, 680 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 681 int_nvvm_fmin_ftz_nan_xorsign_abs_f16, 682 int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX72, hasSM86]>, 683 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, 684 int_nvvm_fmax_f16x2), Float16x2Regs>, 685 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), 686 int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>, 687 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), 688 int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>, 689 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), 690 int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>, 691 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 692 int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), 693 Float16x2Regs, [hasPTX72, hasSM86]>, 694 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 695 int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), 696 Float16x2Regs, [hasPTX72, hasSM86]>, 697 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 698 int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), 699 Float16x2Regs, [hasPTX72, hasSM86]>, 700 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 701 int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, 702 int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), 703 Float16x2Regs, [hasPTX72, hasSM86]>, 704 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), 705 int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, 706 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, 707 int_nvvm_fmax_nan_bf16), Int16Regs>, 708 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"), 709 int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16), 710 Int16Regs, [hasPTX72, hasSM86]>, 711 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"), 712 int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16), 713 Int16Regs, [hasPTX72, hasSM86]>, 714 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2, 715 int_nvvm_fmax_bf16x2), Int32Regs>, 716 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"), 717 int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>, 718 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 719 int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2), 720 Int32Regs, [hasPTX72, hasSM86]>, 721 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 722 int_nvvm_fmin_nan_xorsign_abs_bf16x2, 723 int_nvvm_fmax_nan_xorsign_abs_bf16x2), 724 Int32Regs, [hasPTX72, hasSM86]>] in { 725 def P.Variant : F_MATH_2<!strconcat( 726 IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"), 727 P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 728 } 729} 730 731defm INT_NVVM_FMIN : MIN_MAX<"min">; 732defm INT_NVVM_FMAN : MIN_MAX<"max">; 733 734// 735// Multiplication 736// 737 738def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 739 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 740def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 741 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 742 743def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 744 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 745def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 746 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 747 748def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 749 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 750def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 751 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 752def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 753 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 754def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 755 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 756def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 757 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 758def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 759 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 760def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 761 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 762def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 763 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 764 765def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 766 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 767def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 768 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 769def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 770 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 771def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 772 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 773 774def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 775 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 776def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 777 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 778 779// 780// Div 781// 782 783def INT_NVVM_DIV_APPROX_FTZ_F 784 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 785 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 786def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 787 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 788 789def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 790 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 791def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 792 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 793def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 794 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 795def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 796 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 797def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 798 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 799def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 800 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 801def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 802 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 803def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 804 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 805 806def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 807 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 808def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 809 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 810def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 811 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 812def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 813 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 814 815// 816// Sad 817// 818 819def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 820 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 821def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 822 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 823 824// 825// Floor Ceil 826// 827 828def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 829 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 830def : Pat<(int_nvvm_floor_f Float32Regs:$a), 831 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 832def : Pat<(int_nvvm_floor_d Float64Regs:$a), 833 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 834 835def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 836 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 837def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 838 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 839def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 840 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 841 842// 843// Abs 844// 845 846def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 847 Float32Regs, int_nvvm_fabs_ftz_f>; 848def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 849 Float32Regs, int_nvvm_fabs_f>; 850 851def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 852 Float64Regs, int_nvvm_fabs_d>; 853 854// 855// Abs, Neg bf16, bf16x2 856// 857 858def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs, 859 Int16Regs, int_nvvm_abs_bf16, [hasPTX70, hasSM80]>; 860def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs, 861 Int32Regs, int_nvvm_abs_bf16x2, [hasPTX70, hasSM80]>; 862def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs, 863 Int16Regs, int_nvvm_neg_bf16, [hasPTX70, hasSM80]>; 864def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs, 865 Int32Regs, int_nvvm_neg_bf16x2, [hasPTX70, hasSM80]>; 866 867// 868// Round 869// 870 871def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 872 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 873def : Pat<(int_nvvm_round_f Float32Regs:$a), 874 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 875def : Pat<(int_nvvm_round_d Float64Regs:$a), 876 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 877 878// 879// Trunc 880// 881 882def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 883 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 884def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 885 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 886def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 887 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 888 889// 890// Saturate 891// 892 893def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 894 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 895def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 896 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 897def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 898 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 899 900// 901// Exp2 Log2 902// 903 904def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 905 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 906def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 907 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 908def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 909 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 910def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", 911 Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX70, hasSM75]>; 912def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", 913 Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX70, hasSM75]>; 914 915def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 916 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 917def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 918 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 919def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 920 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 921 922// 923// Sin Cos 924// 925 926def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 927 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 928def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 929 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 930 931def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 932 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 933def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 934 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 935 936// 937// Fma 938// 939 940class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 941 list<Predicate> Preds = []> { 942 string Variant = V; 943 Intrinsic Intr = I; 944 NVPTXRegClass RegClass = RC; 945 list<Predicate> Predicates = Preds; 946} 947 948multiclass FMA_INST { 949 foreach P = [ 950 FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>, 951 FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>, 952 FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>, 953 FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>, 954 955 FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>, 956 FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>, 957 FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>, 958 FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>, 959 FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>, 960 FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>, 961 FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, 962 FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, 963 964 FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX42, hasSM53]>, 965 FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs, 966 [hasPTX42, hasSM53]>, 967 FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs, 968 [hasPTX42, hasSM53]>, 969 FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs, 970 [hasPTX42, hasSM53]>, 971 FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs, 972 [hasPTX70, hasSM80]>, 973 FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs, 974 [hasPTX70, hasSM80]>, 975 976 FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs, 977 [hasPTX42, hasSM53]>, 978 FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs, 979 [hasPTX42, hasSM53]>, 980 FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs, 981 [hasPTX42, hasSM53]>, 982 FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, 983 Float16x2Regs, [hasPTX42, hasSM53]>, 984 FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs, 985 [hasPTX70, hasSM80]>, 986 FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, 987 Float16x2Regs, [hasPTX70, hasSM80]>, 988 989 FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX70, hasSM80]>, 990 FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, 991 [hasPTX70, hasSM80]>, 992 993 FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, 994 [hasPTX70, hasSM80]>, 995 FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, 996 [hasPTX70, hasSM80]> 997 ] in { 998 def P.Variant : 999 F_MATH_3<!strconcat("fma", 1000 !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"), 1001 P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 1002 } 1003} 1004 1005defm INT_NVVM_FMA : FMA_INST; 1006 1007// 1008// Rcp 1009// 1010 1011def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 1012 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 1013def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 1014 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 1015def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 1016 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 1017def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 1018 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 1019def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 1020 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 1021def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 1022 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 1023def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 1024 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 1025def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 1026 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 1027 1028def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 1029 Float64Regs, int_nvvm_rcp_rn_d>; 1030def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 1031 Float64Regs, int_nvvm_rcp_rz_d>; 1032def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 1033 Float64Regs, int_nvvm_rcp_rm_d>; 1034def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 1035 Float64Regs, int_nvvm_rcp_rp_d>; 1036 1037def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;", 1038 Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>; 1039def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 1040 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 1041 1042// 1043// Sqrt 1044// 1045 1046def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 1047 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 1048def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 1049 Float32Regs, int_nvvm_sqrt_rn_f>; 1050def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 1051 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 1052def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 1053 Float32Regs, int_nvvm_sqrt_rz_f>; 1054def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 1055 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 1056def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 1057 Float32Regs, int_nvvm_sqrt_rm_f>; 1058def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 1059 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 1060def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 1061 Float32Regs, int_nvvm_sqrt_rp_f>; 1062def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 1063 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 1064def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 1065 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 1066 1067def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 1068 Float64Regs, int_nvvm_sqrt_rn_d>; 1069def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 1070 Float64Regs, int_nvvm_sqrt_rz_d>; 1071def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 1072 Float64Regs, int_nvvm_sqrt_rm_d>; 1073def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 1074 Float64Regs, int_nvvm_sqrt_rp_d>; 1075 1076// nvvm_sqrt intrinsic 1077def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1078 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 1079def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1080 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 1081def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1082 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 1083def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 1084 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 1085 1086// 1087// Rsqrt 1088// 1089 1090def INT_NVVM_RSQRT_APPROX_FTZ_F 1091 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 1092 int_nvvm_rsqrt_approx_ftz_f>; 1093def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 1094 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 1095def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 1096 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 1097 1098// 1099// Add 1100// 1101 1102def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 1103 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 1104def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 1105 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 1106def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 1107 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 1108def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 1109 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 1110def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 1111 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 1112def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 1113 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 1114def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 1115 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 1116def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 1117 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 1118 1119def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 1120 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 1121def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 1122 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 1123def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 1124 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 1125def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 1126 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 1127 1128// 1129// Convert 1130// 1131 1132def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 1133 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 1134def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 1135 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 1136def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 1137 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 1138def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 1139 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 1140def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 1141 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 1142def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 1143 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 1144def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 1145 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 1146def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 1147 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 1148 1149def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 1150 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 1151def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 1152 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 1153def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 1154 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 1155def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 1156 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 1157 1158def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 1159 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 1160def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 1161 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 1162def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 1163 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 1164def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 1165 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 1166 1167def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 1168 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 1169def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 1170 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 1171def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 1172 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 1173def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 1174 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 1175 1176def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 1177 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 1178def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 1179 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 1180def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 1181 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 1182def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 1183 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 1184 1185def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 1186 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1187def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 1188 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 1189def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 1190 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1191def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 1192 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 1193def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 1194 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1195def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 1196 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 1197def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 1198 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1199def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 1200 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 1201 1202def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 1203 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1204def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 1205 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 1206def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 1207 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1208def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 1209 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 1210def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 1211 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1212def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 1213 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 1214def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 1215 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1216def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 1217 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 1218 1219def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 1220 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 1221def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 1222 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 1223def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 1224 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 1225def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 1226 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 1227 1228def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 1229 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 1230def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 1231 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 1232def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 1233 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 1234def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 1235 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 1236 1237def : Pat<(int_nvvm_ff2bf16x2_rn Float32Regs:$a, Float32Regs:$b), 1238 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1239def : Pat<(int_nvvm_ff2bf16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1240 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1241def : Pat<(int_nvvm_ff2bf16x2_rz Float32Regs:$a, Float32Regs:$b), 1242 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1243def : Pat<(int_nvvm_ff2bf16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1244 (CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1245 1246def : Pat<(int_nvvm_ff2f16x2_rn Float32Regs:$a, Float32Regs:$b), 1247 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>; 1248def : Pat<(int_nvvm_ff2f16x2_rn_relu Float32Regs:$a, Float32Regs:$b), 1249 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN_RELU)>; 1250def : Pat<(int_nvvm_ff2f16x2_rz Float32Regs:$a, Float32Regs:$b), 1251 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ)>; 1252def : Pat<(int_nvvm_ff2f16x2_rz_relu Float32Regs:$a, Float32Regs:$b), 1253 (CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRZ_RELU)>; 1254 1255def : Pat<(int_nvvm_f2bf16_rn Float32Regs:$a), 1256 (CVT_bf16_f32 Float32Regs:$a, CvtRN)>; 1257def : Pat<(int_nvvm_f2bf16_rn_relu Float32Regs:$a), 1258 (CVT_bf16_f32 Float32Regs:$a, CvtRN_RELU)>; 1259def : Pat<(int_nvvm_f2bf16_rz Float32Regs:$a), 1260 (CVT_bf16_f32 Float32Regs:$a, CvtRZ)>; 1261def : Pat<(int_nvvm_f2bf16_rz_relu Float32Regs:$a), 1262 (CVT_bf16_f32 Float32Regs:$a, CvtRZ_RELU)>; 1263 1264def CVT_tf32_f32 : 1265 NVPTXInst<(outs Int32Regs:$dest), (ins Float32Regs:$a), 1266 "cvt.rna.tf32.f32 \t$dest, $a;", 1267 [(set Int32Regs:$dest, (int_nvvm_f2tf32_rna Float32Regs:$a))]>; 1268 1269def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1270 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1271 1272def INT_NVVM_D2I_LO : F_MATH_1< 1273 !strconcat("{{\n\t", 1274 ".reg .b32 %temp; \n\t", 1275 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1276 "}}"), 1277 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1278def INT_NVVM_D2I_HI : F_MATH_1< 1279 !strconcat("{{\n\t", 1280 ".reg .b32 %temp; \n\t", 1281 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1282 "}}"), 1283 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1284 1285def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 1286 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1287def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 1288 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 1289def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 1290 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1291def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 1292 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 1293def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 1294 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1295def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 1296 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 1297def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 1298 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1299def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 1300 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 1301 1302def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 1303 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 1304def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 1305 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 1306def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 1307 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 1308def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 1309 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 1310def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 1311 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 1312def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 1313 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 1314def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 1315 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 1316def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 1317 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 1318 1319def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 1320 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 1321def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 1322 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 1323def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 1324 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 1325def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 1326 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 1327 1328def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 1329 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 1330def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 1331 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 1332def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 1333 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 1334def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 1335 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 1336 1337def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 1338 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 1339def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 1340 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 1341def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 1342 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 1343def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 1344 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 1345 1346def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 1347 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 1348def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 1349 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 1350def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 1351 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 1352def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 1353 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 1354 1355def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 1356 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 1357def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 1358 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 1359def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 1360 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 1361def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 1362 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 1363 1364def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 1365 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 1366def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 1367 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 1368def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 1369 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 1370def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 1371 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 1372 1373 1374def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 1375 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; 1376def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 1377 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; 1378 1379// 1380// Bitcast 1381// 1382 1383def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 1384 Float32Regs, int_nvvm_bitcast_f2i>; 1385def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 1386 Int32Regs, int_nvvm_bitcast_i2f>; 1387 1388def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1389 Int64Regs, int_nvvm_bitcast_ll2d>; 1390def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1391 Float64Regs, int_nvvm_bitcast_d2ll>; 1392 1393// 1394// FNS 1395// 1396 1397class INT_FNS_MBO<dag ins, dag Operands> 1398 : NVPTXInst<(outs Int32Regs:$dst), ins, 1399 "fns.b32 \t$dst, $mask, $base, $offset;", 1400 [(set Int32Regs:$dst, Operands )]>, 1401 Requires<[hasPTX60, hasSM30]>; 1402 1403def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1404 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1405def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1406 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1407def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1408 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1409def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1410 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1411def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1412 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1413def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1414 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1415def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1416 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1417def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1418 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1419 1420//----------------------------------- 1421// Atomic Functions 1422//----------------------------------- 1423 1424class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1425 : PatFrag<ops, frag, AS_match.global>; 1426class ATOMIC_SHARED_CHK <dag ops, dag frag> 1427 : PatFrag<ops, frag, AS_match.shared>; 1428class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1429 : PatFrag<ops, frag, AS_match.generic>; 1430 1431multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1432 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1433 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1434 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1435 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1436 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1437 Requires<Pred>; 1438 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1439 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1440 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1441 Requires<Pred>; 1442} 1443multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1444 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1445 list<Predicate> Pred = []> { 1446 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1447 IntOp, IMMType, IMM, Pred>; 1448 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1449 IntOp, IMMType, IMM, Pred>; 1450} 1451 1452// has 2 operands, neg the second one 1453multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1454 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1455 list<Predicate> Pred> { 1456 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1457 !strconcat( 1458 "{{ \n\t", 1459 ".reg \t.s", TypeStr, " temp; \n\t", 1460 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1461 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1462 "}}"), 1463 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1464 Requires<Pred>; 1465} 1466multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1467 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { 1468 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1469 IntOp, Pred> ; 1470 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1471 IntOp, Pred> ; 1472} 1473 1474// has 3 operands 1475multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1476 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1477 Operand IMMType, list<Predicate> Pred> { 1478 def reg : NVPTXInst<(outs regclass:$dst), 1479 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1480 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1481 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1482 Requires<Pred>; 1483 1484 def imm1 : NVPTXInst<(outs regclass:$dst), 1485 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1486 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1487 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1488 Requires<Pred>; 1489 1490 def imm2 : NVPTXInst<(outs regclass:$dst), 1491 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1492 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1493 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1494 Requires<Pred>; 1495 1496 def imm3 : NVPTXInst<(outs regclass:$dst), 1497 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1498 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1499 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1500 Requires<Pred>; 1501} 1502multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1503 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1504 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1505 IntOp, IMMType, Pred>; 1506 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1507 IntOp, IMMType, Pred>; 1508} 1509 1510// atom_add 1511 1512def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1513 (atomic_load_add_32 node:$a, node:$b)>; 1514def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1515 (atomic_load_add_32 node:$a, node:$b)>; 1516def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1517 (atomic_load_add_32 node:$a, node:$b)>; 1518def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1519 (atomic_load_add_64 node:$a, node:$b)>; 1520def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1521 (atomic_load_add_64 node:$a, node:$b)>; 1522def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1523 (atomic_load_add_64 node:$a, node:$b)>; 1524def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1525 (atomic_load_fadd node:$a, node:$b)>; 1526def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1527 (atomic_load_fadd node:$a, node:$b)>; 1528def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1529 (atomic_load_fadd node:$a, node:$b)>; 1530 1531defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1532 atomic_load_add_32_g, i32imm, imm>; 1533defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1534 atomic_load_add_32_s, i32imm, imm>; 1535defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1536 atomic_load_add_32_gen, i32imm, imm>; 1537defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1538 ".add", atomic_load_add_32_gen, i32imm, imm>; 1539 1540defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1541 atomic_load_add_64_g, i64imm, imm>; 1542defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1543 atomic_load_add_64_s, i64imm, imm>; 1544defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1545 atomic_load_add_64_gen, i64imm, imm>; 1546defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1547 ".add", atomic_load_add_64_gen, i64imm, imm>; 1548 1549defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1550 atomic_load_add_g, f32imm, fpimm>; 1551defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1552 atomic_load_add_s, f32imm, fpimm>; 1553defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1554 atomic_load_add_gen, f32imm, fpimm>; 1555 1556defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1557 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1558defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1559 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1560defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1561 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1562 1563// atom_sub 1564 1565def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1566 (atomic_load_sub_32 node:$a, node:$b)>; 1567def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1568 (atomic_load_sub_32 node:$a, node:$b)>; 1569def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1570 (atomic_load_sub_32 node:$a, node:$b)>; 1571def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1572 (atomic_load_sub_64 node:$a, node:$b)>; 1573def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1574 (atomic_load_sub_64 node:$a, node:$b)>; 1575def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1576 (atomic_load_sub_64 node:$a, node:$b)>; 1577 1578defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1579 atomic_load_sub_32_g>; 1580defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1581 atomic_load_sub_64_g>; 1582defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1583 atomic_load_sub_32_gen>; 1584defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1585 ".add", atomic_load_sub_32_gen>; 1586defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1587 atomic_load_sub_32_s>; 1588defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1589 atomic_load_sub_64_s>; 1590defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1591 atomic_load_sub_64_gen>; 1592defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1593 ".add", atomic_load_sub_64_gen>; 1594 1595// atom_swap 1596 1597def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1598 (atomic_swap_32 node:$a, node:$b)>; 1599def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1600 (atomic_swap_32 node:$a, node:$b)>; 1601def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1602 (atomic_swap_32 node:$a, node:$b)>; 1603def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1604 (atomic_swap_64 node:$a, node:$b)>; 1605def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1606 (atomic_swap_64 node:$a, node:$b)>; 1607def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1608 (atomic_swap_64 node:$a, node:$b)>; 1609 1610defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1611 atomic_swap_32_g, i32imm, imm>; 1612defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1613 atomic_swap_32_s, i32imm, imm>; 1614defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1615 atomic_swap_32_gen, i32imm, imm>; 1616defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1617 ".exch", atomic_swap_32_gen, i32imm, imm>; 1618defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1619 atomic_swap_64_g, i64imm, imm>; 1620defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1621 atomic_swap_64_s, i64imm, imm>; 1622defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1623 atomic_swap_64_gen, i64imm, imm>; 1624defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1625 ".exch", atomic_swap_64_gen, i64imm, imm>; 1626 1627// atom_max 1628 1629def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1630 , (atomic_load_max_32 node:$a, node:$b)>; 1631def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1632 (atomic_load_max_32 node:$a, node:$b)>; 1633def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1634 (atomic_load_max_32 node:$a, node:$b)>; 1635def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1636 , (atomic_load_max_64 node:$a, node:$b)>; 1637def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1638 (atomic_load_max_64 node:$a, node:$b)>; 1639def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1640 (atomic_load_max_64 node:$a, node:$b)>; 1641def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1642 (atomic_load_umax_32 node:$a, node:$b)>; 1643def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1644 (atomic_load_umax_32 node:$a, node:$b)>; 1645def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1646 (atomic_load_umax_32 node:$a, node:$b)>; 1647def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1648 (atomic_load_umax_64 node:$a, node:$b)>; 1649def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1650 (atomic_load_umax_64 node:$a, node:$b)>; 1651def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1652 (atomic_load_umax_64 node:$a, node:$b)>; 1653 1654defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1655 ".max", atomic_load_max_32_g, i32imm, imm>; 1656defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1657 ".max", atomic_load_max_32_s, i32imm, imm>; 1658defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1659 atomic_load_max_32_gen, i32imm, imm>; 1660defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1661 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1662defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1663 ".max", atomic_load_max_64_g, i64imm, imm, [hasSM32]>; 1664defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1665 ".max", atomic_load_max_64_s, i64imm, imm, [hasSM32]>; 1666defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1667 atomic_load_max_64_gen, i64imm, imm, [hasSM32]>; 1668defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1669 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM32]>; 1670defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1671 ".max", atomic_load_umax_32_g, i32imm, imm>; 1672defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1673 ".max", atomic_load_umax_32_s, i32imm, imm>; 1674defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1675 atomic_load_umax_32_gen, i32imm, imm>; 1676defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1677 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1678defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1679 ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM32]>; 1680defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1681 ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM32]>; 1682defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1683 atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>; 1684defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1685 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>; 1686 1687// atom_min 1688 1689def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1690 (atomic_load_min_32 node:$a, node:$b)>; 1691def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1692 (atomic_load_min_32 node:$a, node:$b)>; 1693def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1694 (atomic_load_min_32 node:$a, node:$b)>; 1695def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1696 (atomic_load_min_64 node:$a, node:$b)>; 1697def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1698 (atomic_load_min_64 node:$a, node:$b)>; 1699def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1700 (atomic_load_min_64 node:$a, node:$b)>; 1701def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1702 (atomic_load_umin_32 node:$a, node:$b)>; 1703def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1704 (atomic_load_umin_32 node:$a, node:$b)>; 1705def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1706 (atomic_load_umin_32 node:$a, node:$b)>; 1707def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1708 (atomic_load_umin_64 node:$a, node:$b)>; 1709def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1710 (atomic_load_umin_64 node:$a, node:$b)>; 1711def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1712 (atomic_load_umin_64 node:$a, node:$b)>; 1713 1714defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1715 ".min", atomic_load_min_32_g, i32imm, imm>; 1716defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1717 ".min", atomic_load_min_32_s, i32imm, imm>; 1718defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1719 atomic_load_min_32_gen, i32imm, imm>; 1720defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1721 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1722defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1723 ".min", atomic_load_min_64_g, i64imm, imm, [hasSM32]>; 1724defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1725 ".min", atomic_load_min_64_s, i64imm, imm, [hasSM32]>; 1726defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1727 atomic_load_min_64_gen, i64imm, imm, [hasSM32]>; 1728defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1729 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM32]>; 1730defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1731 ".min", atomic_load_umin_32_g, i32imm, imm>; 1732defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1733 ".min", atomic_load_umin_32_s, i32imm, imm>; 1734defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1735 atomic_load_umin_32_gen, i32imm, imm>; 1736defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1737 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1738defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1739 ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM32]>; 1740defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1741 ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM32]>; 1742defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1743 atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>; 1744defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1745 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>; 1746 1747// atom_inc atom_dec 1748 1749def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1750 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1751def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1752 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1753def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1754 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1755def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1756 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1757def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1758 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1759def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1760 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1761 1762defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1763 atomic_load_inc_32_g, i32imm, imm>; 1764defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1765 atomic_load_inc_32_s, i32imm, imm>; 1766defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1767 atomic_load_inc_32_gen, i32imm, imm>; 1768defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1769 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1770defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1771 atomic_load_dec_32_g, i32imm, imm>; 1772defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1773 atomic_load_dec_32_s, i32imm, imm>; 1774defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1775 atomic_load_dec_32_gen, i32imm, imm>; 1776defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1777 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1778 1779// atom_and 1780 1781def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1782 (atomic_load_and_32 node:$a, node:$b)>; 1783def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1784 (atomic_load_and_32 node:$a, node:$b)>; 1785def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1786 (atomic_load_and_32 node:$a, node:$b)>; 1787def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1788 (atomic_load_and_64 node:$a, node:$b)>; 1789def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1790 (atomic_load_and_64 node:$a, node:$b)>; 1791def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1792 (atomic_load_and_64 node:$a, node:$b)>; 1793 1794defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1795 atomic_load_and_32_g, i32imm, imm>; 1796defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1797 atomic_load_and_32_s, i32imm, imm>; 1798defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1799 atomic_load_and_32_gen, i32imm, imm>; 1800defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1801 ".and", atomic_load_and_32_gen, i32imm, imm>; 1802defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1803 atomic_load_and_64_g, i64imm, imm, [hasSM32]>; 1804defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1805 atomic_load_and_64_s, i64imm, imm, [hasSM32]>; 1806defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1807 atomic_load_and_64_gen, i64imm, imm, [hasSM32]>; 1808defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1809 ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM32]>; 1810 1811// atom_or 1812 1813def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1814 (atomic_load_or_32 node:$a, node:$b)>; 1815def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1816 (atomic_load_or_32 node:$a, node:$b)>; 1817def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1818 (atomic_load_or_32 node:$a, node:$b)>; 1819def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1820 (atomic_load_or_64 node:$a, node:$b)>; 1821def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1822 (atomic_load_or_64 node:$a, node:$b)>; 1823def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1824 (atomic_load_or_64 node:$a, node:$b)>; 1825 1826defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1827 atomic_load_or_32_g, i32imm, imm>; 1828defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1829 atomic_load_or_32_gen, i32imm, imm>; 1830defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1831 ".or", atomic_load_or_32_gen, i32imm, imm>; 1832defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1833 atomic_load_or_32_s, i32imm, imm>; 1834defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1835 atomic_load_or_64_g, i64imm, imm, [hasSM32]>; 1836defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1837 atomic_load_or_64_gen, i64imm, imm, [hasSM32]>; 1838defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1839 ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM32]>; 1840defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1841 atomic_load_or_64_s, i64imm, imm, [hasSM32]>; 1842 1843// atom_xor 1844 1845def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1846 (atomic_load_xor_32 node:$a, node:$b)>; 1847def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1848 (atomic_load_xor_32 node:$a, node:$b)>; 1849def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1850 (atomic_load_xor_32 node:$a, node:$b)>; 1851def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1852 (atomic_load_xor_64 node:$a, node:$b)>; 1853def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1854 (atomic_load_xor_64 node:$a, node:$b)>; 1855def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1856 (atomic_load_xor_64 node:$a, node:$b)>; 1857 1858defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1859 atomic_load_xor_32_g, i32imm, imm>; 1860defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1861 atomic_load_xor_32_s, i32imm, imm>; 1862defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1863 atomic_load_xor_32_gen, i32imm, imm>; 1864defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1865 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1866defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1867 atomic_load_xor_64_g, i64imm, imm, [hasSM32]>; 1868defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1869 atomic_load_xor_64_s, i64imm, imm, [hasSM32]>; 1870defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1871 atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>; 1872defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1873 ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>; 1874 1875// atom_cas 1876 1877def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1878 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1879def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1880 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1881def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1882 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1883def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1884 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1885def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1886 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1887def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1888 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1889 1890defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1891 atomic_cmp_swap_32_g, i32imm>; 1892defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1893 atomic_cmp_swap_32_s, i32imm>; 1894defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1895 atomic_cmp_swap_32_gen, i32imm>; 1896defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1897 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1898defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1899 atomic_cmp_swap_64_g, i64imm>; 1900defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1901 atomic_cmp_swap_64_s, i64imm>; 1902defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1903 atomic_cmp_swap_64_gen, i64imm>; 1904defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1905 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1906 1907// Support for scoped atomic operations. Matches 1908// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1909// and converts it into the appropriate instruction. 1910// NOTE: not all possible combinations are implemented 1911// 'space' is limited to generic as it's the only one needed to support CUDA. 1912// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1913class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1914 dag ins, dag Operands> 1915 : NVPTXInst<(outs regclass:$result), ins, 1916 AsmStr, 1917 [(set regclass:$result, Operands)]>, 1918 Requires<Preds>; 1919 1920// Define instruction variants for all addressing modes. 1921multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1922 NVPTXRegClass regclass, Operand ImmType, 1923 SDNode Imm, ValueType ImmTy, 1924 list<Predicate> Preds> { 1925 let AddedComplexity = 1 in { 1926 def : ATOM23_impl<AsmStr, regclass, Preds, 1927 (ins Int32Regs:$src, regclass:$b), 1928 (Intr Int32Regs:$src, regclass:$b)>; 1929 def : ATOM23_impl<AsmStr, regclass, Preds, 1930 (ins Int64Regs:$src, regclass:$b), 1931 (Intr Int64Regs:$src, regclass:$b)>; 1932 } 1933 // tablegen can't infer argument types from Intrinsic (though it can 1934 // from Instruction) so we have to enforce specific type on 1935 // immediates via explicit cast to ImmTy. 1936 def : ATOM23_impl<AsmStr, regclass, Preds, 1937 (ins Int32Regs:$src, ImmType:$b), 1938 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1939 def : ATOM23_impl<AsmStr, regclass, Preds, 1940 (ins Int64Regs:$src, ImmType:$b), 1941 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1942} 1943 1944multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1945 NVPTXRegClass regclass, Operand ImmType, 1946 SDNode Imm, ValueType ImmTy, 1947 list<Predicate> Preds> { 1948 // Variants for register/immediate permutations of $b and $c 1949 let AddedComplexity = 2 in { 1950 def : ATOM23_impl<AsmStr, regclass, Preds, 1951 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1952 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1953 def : ATOM23_impl<AsmStr, regclass, Preds, 1954 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1955 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1956 } 1957 let AddedComplexity = 1 in { 1958 def : ATOM23_impl<AsmStr, regclass, Preds, 1959 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1960 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1961 def : ATOM23_impl<AsmStr, regclass, Preds, 1962 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1963 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1964 def : ATOM23_impl<AsmStr, regclass, Preds, 1965 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1966 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1967 def : ATOM23_impl<AsmStr, regclass, Preds, 1968 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 1969 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1970 } 1971 def : ATOM23_impl<AsmStr, regclass, Preds, 1972 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 1973 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1974 def : ATOM23_impl<AsmStr, regclass, Preds, 1975 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 1976 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1977} 1978 1979// Constructs intrinsic name and instruction asm strings. 1980multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 1981 string ScopeStr, string SpaceStr, 1982 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1983 ValueType ImmTy, list<Predicate> Preds> { 1984 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1985 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1986 # "." # OpStr # "." # TypeStr 1987 # " \t$result, [$src], $b;", 1988 !cast<Intrinsic>( 1989 "int_nvvm_atomic_" # OpStr 1990 # "_" # SpaceStr # "_" # IntTypeStr 1991 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 1992 regclass, ImmType, Imm, ImmTy, Preds>; 1993} 1994multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 1995 string ScopeStr, string SpaceStr, 1996 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1997 ValueType ImmTy, list<Predicate> Preds> { 1998 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1999 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2000 # "." # OpStr # "." # TypeStr 2001 # " \t$result, [$src], $b, $c;", 2002 !cast<Intrinsic>( 2003 "int_nvvm_atomic_" # OpStr 2004 # "_" # SpaceStr # "_" # IntTypeStr 2005 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2006 regclass, ImmType, Imm, ImmTy, Preds>; 2007} 2008 2009// Constructs variants for different address spaces. 2010// For now we only need variants for generic space pointers. 2011multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 2012 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 2013 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2014 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2015 regclass, ImmType, Imm, ImmTy, Preds>; 2016} 2017multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 2018 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 2019 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2020 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2021 regclass, ImmType, Imm, ImmTy, Preds>; 2022} 2023 2024// Constructs variants for different scopes of atomic op. 2025multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 2026 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2027 ValueType ImmTy, list<Predicate> Preds> { 2028 // .gpu scope is default and is currently covered by existing 2029 // atomics w/o explicitly specified scope. 2030 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2031 regclass, ImmType, Imm, ImmTy, 2032 !listconcat(Preds,[hasAtomScope])>; 2033 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2034 regclass, ImmType, Imm, ImmTy, 2035 !listconcat(Preds,[hasAtomScope])>; 2036} 2037multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 2038 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 2039 list<Predicate> Preds> { 2040 // No need to define ".gpu"-scoped atomics. They do the same thing 2041 // as the regular, non-scoped atomics defined elsewhere. 2042 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2043 regclass, ImmType, Imm, ImmTy, 2044 !listconcat(Preds,[hasAtomScope])>; 2045 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2046 regclass, ImmType, Imm, ImmTy, 2047 !listconcat(Preds,[hasAtomScope])>; 2048} 2049 2050// atom.add 2051multiclass ATOM2_add_impl<string OpStr> { 2052 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 2053 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 2054 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 2055 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 2056 []>; 2057 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 2058 [hasAtomAddF64]>; 2059} 2060 2061// atom.{and,or,xor} 2062multiclass ATOM2_bitwise_impl<string OpStr> { 2063 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 2064 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 2065 [hasAtomBitwise64]>; 2066} 2067 2068// atom.exch 2069multiclass ATOM2_exch_impl<string OpStr> { 2070 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 2071 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 2072} 2073 2074// atom.{min,max} 2075multiclass ATOM2_minmax_impl<string OpStr> { 2076 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 2077 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 2078 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 2079 [hasAtomMinMax64]>; 2080 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 2081 [hasAtomMinMax64]>; 2082} 2083 2084// atom.{inc,dec} 2085multiclass ATOM2_incdec_impl<string OpStr> { 2086 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 2087} 2088 2089// atom.cas 2090multiclass ATOM3_cas_impl<string OpStr> { 2091 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 2092 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 2093} 2094 2095defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 2096defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 2097defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 2098defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 2099defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 2100defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 2101defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 2102defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 2103defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 2104defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 2105 2106//----------------------------------- 2107// Support for ldu on sm_20 or later 2108//----------------------------------- 2109 2110// Don't annotate ldu instructions as mayLoad, as they load from memory that is 2111// read-only in a kernel. 2112 2113// Scalar 2114 2115multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 2116 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2117 !strconcat("ldu.global.", TyStr), 2118 []>, Requires<[hasLDU]>; 2119 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2120 !strconcat("ldu.global.", TyStr), 2121 []>, Requires<[hasLDU]>; 2122 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2123 !strconcat("ldu.global.", TyStr), 2124 []>, Requires<[hasLDU]>; 2125 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2126 !strconcat("ldu.global.", TyStr), 2127 []>, Requires<[hasLDU]>; 2128 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2129 !strconcat("ldu.global.", TyStr), 2130 []>, Requires<[hasLDU]>; 2131} 2132 2133defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 2134defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 2135defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 2136defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 2137defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; 2138defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; 2139defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 2140defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 2141defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 2142defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 2143 2144// vector 2145 2146// Elementized vector ldu 2147multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2148 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2149 (ins Int32Regs:$src), 2150 !strconcat("ldu.global.", TyStr), []>; 2151 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2152 (ins Int64Regs:$src), 2153 !strconcat("ldu.global.", TyStr), []>; 2154 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2155 (ins MEMri:$src), 2156 !strconcat("ldu.global.", TyStr), []>; 2157 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2158 (ins MEMri64:$src), 2159 !strconcat("ldu.global.", TyStr), []>; 2160 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2161 (ins imemAny:$src), 2162 !strconcat("ldu.global.", TyStr), []>; 2163} 2164 2165multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2166 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2167 regclass:$dst4), (ins Int32Regs:$src), 2168 !strconcat("ldu.global.", TyStr), []>; 2169 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2170 regclass:$dst4), (ins Int64Regs:$src), 2171 !strconcat("ldu.global.", TyStr), []>; 2172 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2173 regclass:$dst4), (ins MEMri:$src), 2174 !strconcat("ldu.global.", TyStr), []>; 2175 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2176 regclass:$dst4), (ins MEMri64:$src), 2177 !strconcat("ldu.global.", TyStr), []>; 2178 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2179 regclass:$dst4), (ins imemAny:$src), 2180 !strconcat("ldu.global.", TyStr), []>; 2181} 2182 2183defm INT_PTX_LDU_G_v2i8_ELE 2184 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2185defm INT_PTX_LDU_G_v2i16_ELE 2186 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2187defm INT_PTX_LDU_G_v2i32_ELE 2188 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2189defm INT_PTX_LDU_G_v2f16_ELE 2190 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 2191defm INT_PTX_LDU_G_v2f16x2_ELE 2192 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 2193defm INT_PTX_LDU_G_v2f32_ELE 2194 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2195defm INT_PTX_LDU_G_v2i64_ELE 2196 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2197defm INT_PTX_LDU_G_v2f64_ELE 2198 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2199defm INT_PTX_LDU_G_v4i8_ELE 2200 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2201defm INT_PTX_LDU_G_v4i16_ELE 2202 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2203 Int16Regs>; 2204defm INT_PTX_LDU_G_v4i32_ELE 2205 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2206 Int32Regs>; 2207defm INT_PTX_LDU_G_v4f16_ELE 2208 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2209 Float16Regs>; 2210defm INT_PTX_LDU_G_v4f16x2_ELE 2211 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2212 Float16x2Regs>; 2213defm INT_PTX_LDU_G_v4f32_ELE 2214 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2215 Float32Regs>; 2216 2217 2218//----------------------------------- 2219// Support for ldg on sm_35 or later 2220//----------------------------------- 2221 2222// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2223// non-coherent texture cache, and therefore the values read must be read-only 2224// during the lifetime of the kernel. 2225 2226multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2227 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2228 !strconcat("ld.global.nc.", TyStr), 2229 []>, Requires<[hasLDG]>; 2230 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2231 !strconcat("ld.global.nc.", TyStr), 2232 []>, Requires<[hasLDG]>; 2233 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2234 !strconcat("ld.global.nc.", TyStr), 2235 []>, Requires<[hasLDG]>; 2236 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2237 !strconcat("ld.global.nc.", TyStr), 2238 []>, Requires<[hasLDG]>; 2239 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2240 !strconcat("ld.global.nc.", TyStr), 2241 []>, Requires<[hasLDG]>; 2242} 2243 2244defm INT_PTX_LDG_GLOBAL_i8 2245 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2246defm INT_PTX_LDG_GLOBAL_i16 2247 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2248defm INT_PTX_LDG_GLOBAL_i32 2249 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2250defm INT_PTX_LDG_GLOBAL_i64 2251 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2252defm INT_PTX_LDG_GLOBAL_f16 2253 : LDG_G<"b16 \t$result, [$src];", Float16Regs>; 2254defm INT_PTX_LDG_GLOBAL_f16x2 2255 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; 2256defm INT_PTX_LDG_GLOBAL_f32 2257 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2258defm INT_PTX_LDG_GLOBAL_f64 2259 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2260defm INT_PTX_LDG_GLOBAL_p32 2261 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2262defm INT_PTX_LDG_GLOBAL_p64 2263 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2264 2265// vector 2266 2267// Elementized vector ldg 2268multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2269 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2270 (ins Int32Regs:$src), 2271 !strconcat("ld.global.nc.", TyStr), []>; 2272 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2273 (ins Int64Regs:$src), 2274 !strconcat("ld.global.nc.", TyStr), []>; 2275 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2276 (ins MEMri:$src), 2277 !strconcat("ld.global.nc.", TyStr), []>; 2278 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2279 (ins MEMri64:$src), 2280 !strconcat("ld.global.nc.", TyStr), []>; 2281 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2282 (ins imemAny:$src), 2283 !strconcat("ld.global.nc.", TyStr), []>; 2284} 2285 2286multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2287 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2288 regclass:$dst4), (ins Int32Regs:$src), 2289 !strconcat("ld.global.nc.", TyStr), []>; 2290 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2291 regclass:$dst4), (ins Int64Regs:$src), 2292 !strconcat("ld.global.nc.", TyStr), []>; 2293 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2294 regclass:$dst4), (ins MEMri:$src), 2295 !strconcat("ld.global.nc.", TyStr), []>; 2296 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2297 regclass:$dst4), (ins MEMri64:$src), 2298 !strconcat("ld.global.nc.", TyStr), []>; 2299 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2300 regclass:$dst4), (ins imemAny:$src), 2301 !strconcat("ld.global.nc.", TyStr), []>; 2302} 2303 2304// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2305defm INT_PTX_LDG_G_v2i8_ELE 2306 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2307defm INT_PTX_LDG_G_v2i16_ELE 2308 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2309defm INT_PTX_LDG_G_v2i32_ELE 2310 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2311defm INT_PTX_LDG_G_v2f16_ELE 2312 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 2313defm INT_PTX_LDG_G_v2f16x2_ELE 2314 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 2315defm INT_PTX_LDG_G_v2f32_ELE 2316 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2317defm INT_PTX_LDG_G_v2i64_ELE 2318 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2319defm INT_PTX_LDG_G_v2f64_ELE 2320 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2321defm INT_PTX_LDG_G_v4i8_ELE 2322 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2323defm INT_PTX_LDG_G_v4i16_ELE 2324 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2325defm INT_PTX_LDG_G_v4i32_ELE 2326 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2327defm INT_PTX_LDG_G_v4f16_ELE 2328 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; 2329defm INT_PTX_LDG_G_v4f16x2_ELE 2330 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; 2331defm INT_PTX_LDG_G_v4f32_ELE 2332 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2333 2334 2335multiclass NG_TO_G<string Str, Intrinsic Intrin> { 2336 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2337 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 2338 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2339 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2340 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 2341 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2342 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 2343 "{{ .reg .b64 %tmp;\n\t" 2344 #" cvt.u64.u32 \t%tmp, $src;\n\t" 2345 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 2346 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 2347 Requires<[useShortPtr]>; 2348} 2349 2350multiclass G_TO_NG<string Str, Intrinsic Intrin> { 2351 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2352 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 2353 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 2354 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2355 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 2356 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 2357 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 2358 "{{ .reg .b64 %tmp;\n\t" 2359 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 2360 #" cvt.u32.u64 \t$result, %tmp; }}", 2361 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 2362 Requires<[useShortPtr]>; 2363} 2364 2365defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 2366defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 2367defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 2368defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 2369 2370defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 2371defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 2372defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 2373defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 2374 2375 2376// nvvm.ptr.gen.to.param 2377def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 2378 (ins Int32Regs:$src), 2379 "mov.u32 \t$result, $src;", 2380 [(set Int32Regs:$result, 2381 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 2382def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 2383 (ins Int64Regs:$src), 2384 "mov.u64 \t$result, $src;", 2385 [(set Int64Regs:$result, 2386 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2387 2388 2389// nvvm.move intrinsicc 2390def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2391 "mov.b16 \t$r, $s;", 2392 [(set Int16Regs:$r, 2393 (int_nvvm_move_i16 Int16Regs:$s))]>; 2394def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2395 "mov.b32 \t$r, $s;", 2396 [(set Int32Regs:$r, 2397 (int_nvvm_move_i32 Int32Regs:$s))]>; 2398def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2399 "mov.b64 \t$r, $s;", 2400 [(set Int64Regs:$r, 2401 (int_nvvm_move_i64 Int64Regs:$s))]>; 2402def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2403 "mov.f32 \t$r, $s;", 2404 [(set Float32Regs:$r, 2405 (int_nvvm_move_float Float32Regs:$s))]>; 2406def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2407 "mov.f64 \t$r, $s;", 2408 [(set Float64Regs:$r, 2409 (int_nvvm_move_double Float64Regs:$s))]>; 2410def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2411 "mov.u32 \t$r, $s;", 2412 [(set Int32Regs:$r, 2413 (int_nvvm_move_ptr Int32Regs:$s))]>; 2414def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2415 "mov.u64 \t$r, $s;", 2416 [(set Int64Regs:$r, 2417 (int_nvvm_move_ptr Int64Regs:$s))]>; 2418 2419// @TODO: Are these actually needed, or will we always just see symbols 2420// copied to registers first? 2421/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2422 "mov.u32 \t$r, $s;", 2423 [(set Int32Regs:$r, 2424 (int_nvvm_move_ptr texternalsym:$s))]>; 2425def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2426 "mov.u64 \t$r, $s;", 2427 [(set Int64Regs:$r, 2428 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2429 2430 2431// MoveParam %r1, param 2432// ptr_local_to_gen %r2, %r1 2433// ptr_gen_to_local %r3, %r2 2434// -> 2435// mov %r1, param 2436 2437// @TODO: Revisit this. There is a type 2438// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2439// instructions are not currently defined. However, we can use the ptr 2440// variants and the asm printer will do the right thing. 2441def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2442 (MoveParam texternalsym:$src)))), 2443 (nvvm_move_ptr64 texternalsym:$src)>; 2444def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2445 (MoveParam texternalsym:$src)))), 2446 (nvvm_move_ptr32 texternalsym:$src)>; 2447 2448def texsurf_handles 2449 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2450 "mov.u64 \t$result, $src;", []>; 2451 2452//----------------------------------- 2453// Compiler Error Warn 2454// - Just ignore them in codegen 2455//----------------------------------- 2456 2457def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2458 "// llvm.nvvm.compiler.warn()", 2459 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2460def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2461 "// llvm.nvvm.compiler.warn()", 2462 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2463def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2464 "// llvm.nvvm.compiler.error()", 2465 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2466def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2467 "// llvm.nvvm.compiler.error()", 2468 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2469 2470 2471// isspacep 2472 2473def ISSPACEP_CONST_32 2474 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2475 "isspacep.const \t$d, $a;", 2476 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 2477 Requires<[hasPTX31]>; 2478def ISSPACEP_CONST_64 2479 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2480 "isspacep.const \t$d, $a;", 2481 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 2482 Requires<[hasPTX31]>; 2483def ISSPACEP_GLOBAL_32 2484 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2485 "isspacep.global \t$d, $a;", 2486 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 2487def ISSPACEP_GLOBAL_64 2488 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2489 "isspacep.global \t$d, $a;", 2490 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 2491def ISSPACEP_LOCAL_32 2492 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2493 "isspacep.local \t$d, $a;", 2494 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 2495def ISSPACEP_LOCAL_64 2496 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2497 "isspacep.local \t$d, $a;", 2498 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 2499def ISSPACEP_SHARED_32 2500 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2501 "isspacep.shared \t$d, $a;", 2502 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 2503def ISSPACEP_SHARED_64 2504 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2505 "isspacep.shared \t$d, $a;", 2506 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 2507 2508 2509// Special register reads 2510def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2511 (ins SpecialRegs:$r), 2512 "mov.b32 \t$d, $r;", []>; 2513 2514def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2515def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2516def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2517def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2518def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2519def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2520def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2521def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2522def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2523def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2524def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2525def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2526def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2527def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2528def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2529def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2530def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2531def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2532def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2533def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2534def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2535def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2536def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2537def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2538def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2539def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2540def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2541def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2542def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2543def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2544def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2545def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2546 2547 2548// rotate builtin support 2549 2550def ROTATE_B32_HW_IMM 2551 : NVPTXInst<(outs Int32Regs:$dst), 2552 (ins Int32Regs:$src, i32imm:$amt), 2553 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2554 [(set Int32Regs:$dst, 2555 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2556 Requires<[hasHWROT32]> ; 2557 2558def ROTATE_B32_HW_REG 2559 : NVPTXInst<(outs Int32Regs:$dst), 2560 (ins Int32Regs:$src, Int32Regs:$amt), 2561 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2562 [(set Int32Regs:$dst, 2563 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2564 Requires<[hasHWROT32]> ; 2565 2566def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2567 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2568 Requires<[noHWROT32]> ; 2569 2570def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2571 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2572 Requires<[noHWROT32]> ; 2573 2574let hasSideEffects = false in { 2575 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2576 !strconcat("{{\n\t", 2577 ".reg .b32 %dummy;\n\t", 2578 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2579 "}}"), 2580 []> ; 2581 2582 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2583 !strconcat("{{\n\t", 2584 ".reg .b32 %dummy;\n\t", 2585 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2586 "}}"), 2587 []> ; 2588} 2589 2590let hasSideEffects = false in { 2591 def PACK_TWO_INT32 2592 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2593 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2594} 2595 2596def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2597 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2598 (GET_LO_INT64 Int64Regs:$src))> ; 2599 2600// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2601// no side effects. 2602let hasSideEffects = false in { 2603 def SHF_L_WRAP_B32_IMM 2604 : NVPTXInst<(outs Int32Regs:$dst), 2605 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2606 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2607 Requires<[hasHWROT32]>; 2608 2609 def SHF_L_WRAP_B32_REG 2610 : NVPTXInst<(outs Int32Regs:$dst), 2611 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2612 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2613 Requires<[hasHWROT32]>; 2614 2615 def SHF_R_WRAP_B32_IMM 2616 : NVPTXInst<(outs Int32Regs:$dst), 2617 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2618 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2619 Requires<[hasHWROT32]>; 2620 2621 def SHF_R_WRAP_B32_REG 2622 : NVPTXInst<(outs Int32Regs:$dst), 2623 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2624 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2625 Requires<[hasHWROT32]>; 2626} 2627 2628// HW version of rotate 64 2629def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2630 (PACK_TWO_INT32 2631 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2632 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2633 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2634 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2635 Requires<[hasHWROT32]>; 2636 2637def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2638 (PACK_TWO_INT32 2639 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2640 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2641 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2642 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2643 Requires<[hasHWROT32]>; 2644 2645 2646def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2647 (PACK_TWO_INT32 2648 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2649 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2650 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2651 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2652 Requires<[hasHWROT32]>; 2653 2654def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2655 (PACK_TWO_INT32 2656 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2657 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2658 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2659 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2660 Requires<[hasHWROT32]>; 2661 2662// SW version of rotate 64 2663def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2664 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, 2665 Requires<[noHWROT32]>; 2666def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2667 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2668 Requires<[noHWROT32]>; 2669def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2670 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2671 Requires<[noHWROT32]>; 2672def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2673 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2674 Requires<[noHWROT32]>; 2675 2676 2677//----------------------------------- 2678// Texture Intrinsics 2679//----------------------------------- 2680 2681// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2682// also defined in NVPTXReplaceImageHandles.cpp 2683 2684// texmode_independent 2685let IsTex = true, IsTexModeUnified = false in { 2686// Texture fetch instructions using handles 2687 2688class TEX_1D_base<string inst, NVPTXRegClass outtype, 2689 NVPTXRegClass intype, dag texsamp> 2690 : NVPTXInst<(outs outtype:$r, outtype:$g, 2691 outtype:$b, outtype:$a), 2692 !con(texsamp, (ins intype:$x)), 2693 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2694 []>; 2695 2696multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2697 def _RR : TEX_1D_base<inst, outtype, intype, 2698 (ins Int64Regs:$t, Int64Regs:$s)>; 2699 def _RI : TEX_1D_base<inst, outtype, intype, 2700 (ins Int64Regs:$t, i64imm:$s)>; 2701 def _IR : TEX_1D_base<inst, outtype, intype, 2702 (ins i64imm:$t, Int64Regs:$s)>; 2703 def _II : TEX_1D_base<inst, outtype, intype, 2704 (ins i64imm:$t, i64imm:$s)>; 2705} 2706 2707defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 2708defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2709defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 2710defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2711defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 2712defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2713 2714class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 2715 NVPTXRegClass intype, dag texsamp> 2716 : NVPTXInst<(outs outtype:$r, outtype:$g, 2717 outtype:$b, outtype:$a), 2718 !con(texsamp, (ins intype:$x, intype:$lod)), 2719 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", 2720 []>; 2721 2722multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, 2723 NVPTXRegClass intype> { 2724 def _RR : TEX_1D_LEVEL_base<inst, outtype, intype, 2725 (ins Int64Regs:$t, Int64Regs:$s)>; 2726 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, 2727 (ins Int64Regs:$t, i64imm:$s)>; 2728 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, 2729 (ins i64imm:$t, Int64Regs:$s)>; 2730 def _II : TEX_1D_LEVEL_base<inst, outtype, intype, 2731 (ins i64imm:$t, i64imm:$s)>; 2732} 2733 2734defm TEX_1D_F32_F32_LEVEL : 2735 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2736defm TEX_1D_S32_F32_LEVEL : 2737 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2738defm TEX_1D_U32_F32_LEVEL : 2739 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2740 2741class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, 2742 NVPTXRegClass intype, dag texsamp> 2743 : NVPTXInst<(outs outtype:$r, outtype:$g, 2744 outtype:$b, outtype:$a), 2745 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), 2746 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," 2747 " \\{$gradx\\}, \\{$grady\\};", 2748 []>; 2749 2750multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, 2751 NVPTXRegClass intype> { 2752 def _RR : TEX_1D_GRAD_base<inst, outtype, intype, 2753 (ins Int64Regs:$t, Int64Regs:$s)>; 2754 def _RI : TEX_1D_GRAD_base<inst, outtype, intype, 2755 (ins Int64Regs:$t, i64imm:$s)>; 2756 def _IR : TEX_1D_GRAD_base<inst, outtype, intype, 2757 (ins i64imm:$t, Int64Regs:$s)>; 2758 def _II : TEX_1D_GRAD_base<inst, outtype, intype, 2759 (ins i64imm:$t, i64imm:$s)>; 2760} 2761 2762defm TEX_1D_F32_F32_GRAD 2763 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 2764defm TEX_1D_S32_F32_GRAD 2765 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 2766defm TEX_1D_U32_F32_GRAD 2767 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 2768 2769class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 2770 NVPTXRegClass intype, dag texsamp> 2771 : NVPTXInst<(outs outtype:$r, outtype:$g, 2772 outtype:$b, outtype:$a), 2773 !con(texsamp, (ins Int32Regs:$l, intype:$x)), 2774 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", 2775 []>; 2776 2777multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, 2778 NVPTXRegClass intype> { 2779 def _RR : TEX_1D_ARRAY_base<inst, outtype, intype, 2780 (ins Int64Regs:$t, Int64Regs:$s)>; 2781 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, 2782 (ins Int64Regs:$t, i64imm:$s)>; 2783 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, 2784 (ins i64imm:$t, Int64Regs:$s)>; 2785 def _II : TEX_1D_ARRAY_base<inst, outtype, intype, 2786 (ins i64imm:$t, i64imm:$s)>; 2787} 2788 2789defm TEX_1D_ARRAY_F32_F32 2790 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2791defm TEX_1D_ARRAY_F32_S32 2792 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 2793defm TEX_1D_ARRAY_S32_S32 2794 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 2795defm TEX_1D_ARRAY_S32_F32 2796 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2797defm TEX_1D_ARRAY_U32_S32 2798 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 2799defm TEX_1D_ARRAY_U32_F32 2800 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2801 2802class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2803 NVPTXRegClass intype, dag texsamp> 2804 : NVPTXInst<(outs outtype:$r, outtype:$g, 2805 outtype:$b, outtype:$a), 2806 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), 2807 inst # " \t\\{$r, $g, $b, $a\\}," 2808 " [$t, $s, \\{$l, $x\\}], $lod;", 2809 []>; 2810 2811multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2812 NVPTXRegClass intype> { 2813 def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2814 (ins Int64Regs:$t, Int64Regs:$s)>; 2815 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2816 (ins Int64Regs:$t, i64imm:$s)>; 2817 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2818 (ins i64imm:$t, Int64Regs:$s)>; 2819 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 2820 (ins i64imm:$t, i64imm:$s)>; 2821} 2822 2823defm TEX_1D_ARRAY_F32_F32_LEVEL 2824 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2825defm TEX_1D_ARRAY_S32_F32_LEVEL 2826 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2827defm TEX_1D_ARRAY_U32_F32_LEVEL 2828 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2829 2830class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 2831 NVPTXRegClass intype, dag texsamp> 2832 : NVPTXInst<(outs outtype:$r, outtype:$g, 2833 outtype:$b, outtype:$a), 2834 !con(texsamp, (ins Int32Regs:$l, intype:$x, 2835 intype:$gradx, intype:$grady)), 2836 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," 2837 " \\{$gradx\\}, \\{$grady\\};", 2838 []>; 2839 2840multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 2841 NVPTXRegClass intype> { 2842 def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2843 (ins Int64Regs:$t, Int64Regs:$s)>; 2844 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2845 (ins Int64Regs:$t, i64imm:$s)>; 2846 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2847 (ins i64imm:$t, Int64Regs:$s)>; 2848 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 2849 (ins i64imm:$t, i64imm:$s)>; 2850} 2851 2852defm TEX_1D_ARRAY_F32_F32_GRAD 2853 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 2854defm TEX_1D_ARRAY_S32_F32_GRAD 2855 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 2856defm TEX_1D_ARRAY_U32_F32_GRAD 2857 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 2858 2859class TEX_2D_base<string inst, NVPTXRegClass outtype, 2860 NVPTXRegClass intype, dag texsamp> 2861 : NVPTXInst<(outs outtype:$r, outtype:$g, 2862 outtype:$b, outtype:$a), 2863 !con(texsamp, (ins intype:$x, intype:$y)), 2864 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", 2865 []>; 2866 2867multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 2868 def _RR : TEX_2D_base<inst, outtype, intype, 2869 (ins Int64Regs:$t, Int64Regs:$s)>; 2870 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; 2871 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; 2872 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; 2873} 2874 2875defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2876defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 2877defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 2878defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2879defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 2880defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2881 2882class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 2883 NVPTXRegClass intype, dag texsamp> 2884 : NVPTXInst<(outs outtype:$r, outtype:$g, 2885 outtype:$b, outtype:$a), 2886 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), 2887 inst # " \t\\{$r, $g, $b, $a\\}," 2888 " [$t, $s, \\{$x, $y\\}], $lod;", 2889 []>; 2890 2891multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, 2892 NVPTXRegClass intype> { 2893 def _RR : TEX_2D_LEVEL_base<inst, outtype, intype, 2894 (ins Int64Regs:$t, Int64Regs:$s)>; 2895 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, 2896 (ins Int64Regs:$t, i64imm:$s)>; 2897 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, 2898 (ins i64imm:$t, Int64Regs:$s)>; 2899 def _II : TEX_2D_LEVEL_base<inst, outtype, intype, 2900 (ins i64imm:$t, i64imm:$s)>; 2901} 2902 2903defm TEX_2D_F32_F32_LEVEL : 2904 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2905defm TEX_2D_S32_F32_LEVEL : 2906 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2907defm TEX_2D_U32_F32_LEVEL : 2908 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2909 2910class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, 2911 NVPTXRegClass intype, dag texsamp> 2912 : NVPTXInst<(outs outtype:$r, outtype:$g, 2913 outtype:$b, outtype:$a), 2914 !con(texsamp, (ins intype:$x, intype:$y, 2915 intype:$gradx0, intype:$gradx1, 2916 intype:$grady0, intype:$grady1)), 2917 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," 2918 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 2919 []>; 2920 2921multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, 2922 NVPTXRegClass intype> { 2923 def _RR : TEX_2D_GRAD_base<inst, outtype, intype, 2924 (ins Int64Regs:$t, Int64Regs:$s)>; 2925 def _RI : TEX_2D_GRAD_base<inst, outtype, intype, 2926 (ins Int64Regs:$t, i64imm:$s)>; 2927 def _IR : TEX_2D_GRAD_base<inst, outtype, intype, 2928 (ins i64imm:$t, Int64Regs:$s)>; 2929 def _II : TEX_2D_GRAD_base<inst, outtype, intype, 2930 (ins i64imm:$t, i64imm:$s)>; 2931} 2932 2933defm TEX_2D_F32_F32_GRAD : 2934 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 2935defm TEX_2D_S32_F32_GRAD : 2936 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 2937defm TEX_2D_U32_F32_GRAD : 2938 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 2939 2940class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 2941 NVPTXRegClass intype, dag texsamp> 2942 : NVPTXInst<(outs outtype:$r, outtype:$g, 2943 outtype:$b, outtype:$a), 2944 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), 2945 inst # " \t\\{$r, $g, $b, $a\\}," 2946 " [$t, $s, \\{$l, $x, $y, $y\\}];", 2947 []>; 2948 2949multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, 2950 NVPTXRegClass intype> { 2951 def _RR : TEX_2D_ARRAY_base<inst, outtype, intype, 2952 (ins Int64Regs:$t, Int64Regs:$s)>; 2953 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, 2954 (ins Int64Regs:$t, i64imm:$s)>; 2955 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, 2956 (ins i64imm:$t, Int64Regs:$s)>; 2957 def _II : TEX_2D_ARRAY_base<inst, outtype, intype, 2958 (ins i64imm:$t, i64imm:$s)>; 2959} 2960 2961defm TEX_2D_ARRAY_F32_F32 2962 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2963defm TEX_2D_ARRAY_F32_S32 2964 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 2965defm TEX_2D_ARRAY_S32_S32 2966 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 2967defm TEX_2D_ARRAY_S32_F32 2968 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 2969defm TEX_2D_ARRAY_U32_S32 2970 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 2971defm TEX_2D_ARRAY_U32_F32 2972 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 2973 2974class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 2975 NVPTXRegClass intype, dag texsamp> 2976 : NVPTXInst<(outs outtype:$r, outtype:$g, 2977 outtype:$b, outtype:$a), 2978 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 2979 intype:$lod)), 2980 inst # " \t\\{$r, $g, $b, $a\\}," 2981 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2982 []>; 2983 2984multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 2985 NVPTXRegClass intype> { 2986 def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2987 (ins Int64Regs:$t, Int64Regs:$s)>; 2988 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2989 (ins Int64Regs:$t, i64imm:$s)>; 2990 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2991 (ins i64imm:$t, Int64Regs:$s)>; 2992 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 2993 (ins i64imm:$t, i64imm:$s)>; 2994} 2995 2996defm TEX_2D_ARRAY_F32_F32_LEVEL 2997 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 2998defm TEX_2D_ARRAY_S32_F32_LEVEL 2999 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3000defm TEX_2D_ARRAY_U32_F32_LEVEL 3001 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3002 3003class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3004 NVPTXRegClass intype, dag texsamp> 3005 : NVPTXInst<(outs outtype:$r, outtype:$g, 3006 outtype:$b, outtype:$a), 3007 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3008 intype:$gradx0, intype:$gradx1, 3009 intype:$grady0, intype:$grady1)), 3010 inst # " \t\\{$r, $g, $b, $a\\}," 3011 " [$t, $s, \\{$l, $x, $y, $y\\}]," 3012 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3013 []>; 3014 3015multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3016 NVPTXRegClass intype> { 3017 def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3018 (ins Int64Regs:$t, Int64Regs:$s)>; 3019 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3020 (ins Int64Regs:$t, i64imm:$s)>; 3021 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3022 (ins i64imm:$t, Int64Regs:$s)>; 3023 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3024 (ins i64imm:$t, i64imm:$s)>; 3025} 3026 3027defm TEX_2D_ARRAY_F32_F32_GRAD 3028 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3029defm TEX_2D_ARRAY_S32_F32_GRAD 3030 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3031defm TEX_2D_ARRAY_U32_F32_GRAD 3032 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3033 3034class TEX_3D_base<string inst, NVPTXRegClass outtype, 3035 NVPTXRegClass intype, dag texsamp> 3036 : NVPTXInst<(outs outtype:$r, outtype:$g, 3037 outtype:$b, outtype:$a), 3038 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3039 inst # " \t\\{$r, $g, $b, $a\\}," 3040 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3041 []>; 3042 3043multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3044 def _RR : TEX_3D_base<inst, outtype, intype, 3045 (ins Int64Regs:$t, Int64Regs:$s)>; 3046 def _RI : TEX_3D_base<inst, outtype, intype, 3047 (ins Int64Regs:$t, i64imm:$s)>; 3048 def _IR : TEX_3D_base<inst, outtype, intype, 3049 (ins i64imm:$t, Int64Regs:$s)>; 3050 def _II : TEX_3D_base<inst, outtype, intype, 3051 (ins i64imm:$t, i64imm:$s)>; 3052} 3053 3054defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3055defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3056defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3057defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3058defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3059defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3060 3061class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3062 NVPTXRegClass intype, dag texsamp> 3063 : NVPTXInst<(outs outtype:$r, outtype:$g, 3064 outtype:$b, outtype:$a), 3065 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3066 intype:$lod)), 3067 inst # " \t\\{$r, $g, $b, $a\\}," 3068 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3069 []>; 3070 3071multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, 3072 NVPTXRegClass intype> { 3073 def _RR : TEX_3D_LEVEL_base<inst, outtype, intype, 3074 (ins Int64Regs:$t, Int64Regs:$s)>; 3075 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, 3076 (ins Int64Regs:$t, i64imm:$s)>; 3077 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, 3078 (ins i64imm:$t, Int64Regs:$s)>; 3079 def _II : TEX_3D_LEVEL_base<inst, outtype, intype, 3080 (ins i64imm:$t, i64imm:$s)>; 3081} 3082 3083defm TEX_3D_F32_F32_LEVEL 3084 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3085defm TEX_3D_S32_F32_LEVEL 3086 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3087defm TEX_3D_U32_F32_LEVEL 3088 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3089 3090class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3091 NVPTXRegClass intype, dag texsamp> 3092 : NVPTXInst<(outs outtype:$r, outtype:$g, 3093 outtype:$b, outtype:$a), 3094 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3095 intype :$gradx0, intype:$gradx1, 3096 intype:$gradx2, intype:$grady0, 3097 intype:$grady1, intype:$grady2)), 3098 inst # " \t\\{$r, $g, $b, $a\\}," 3099 " [$t, $s, \\{$x, $y, $z, $z\\}]," 3100 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3101 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3102 []>; 3103 3104multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, 3105 NVPTXRegClass intype> { 3106 def _RR : TEX_3D_GRAD_base<inst, outtype, intype, 3107 (ins Int64Regs:$t, Int64Regs:$s)>; 3108 def _RI : TEX_3D_GRAD_base<inst, outtype, intype, 3109 (ins Int64Regs:$t, i64imm:$s)>; 3110 def _IR : TEX_3D_GRAD_base<inst, outtype, intype, 3111 (ins i64imm:$t, Int64Regs:$s)>; 3112 def _II : TEX_3D_GRAD_base<inst, outtype, intype, 3113 (ins i64imm:$t, i64imm:$s)>; 3114} 3115 3116defm TEX_3D_F32_F32_GRAD 3117 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3118defm TEX_3D_S32_F32_GRAD 3119 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3120defm TEX_3D_U32_F32_GRAD 3121 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3122 3123class TEX_CUBE_base<string inst, NVPTXRegClass outtype, 3124 NVPTXRegClass intype, dag texsamp> 3125 : NVPTXInst<(outs outtype:$r, outtype:$g, 3126 outtype:$b, outtype:$a), 3127 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3128 inst # " \t\\{$r, $g, $b, $a\\}," 3129 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3130 []>; 3131 3132multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3133 def _RR : TEX_CUBE_base<inst, outtype, intype, 3134 (ins Int64Regs:$t, Int64Regs:$s)>; 3135 def _RI : TEX_CUBE_base<inst, outtype, intype, 3136 (ins Int64Regs:$t, i64imm:$s)>; 3137 def _IR : TEX_CUBE_base<inst, outtype, intype, 3138 (ins i64imm:$t, Int64Regs:$s)>; 3139 def _II : TEX_CUBE_base<inst, outtype, intype, 3140 (ins i64imm:$t, i64imm:$s)>; 3141} 3142 3143defm TEX_CUBE_F32_F32 3144 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3145defm TEX_CUBE_S32_F32 3146 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3147defm TEX_CUBE_U32_F32 3148 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3149 3150class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3151 NVPTXRegClass intype, dag texsamp> 3152 : NVPTXInst<(outs outtype:$r, outtype:$g, 3153 outtype:$b, outtype:$a), 3154 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3155 intype:$lod)), 3156 inst # " \t\\{$r, $g, $b, $a\\}," 3157 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3158 []>; 3159 3160multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3161 NVPTXRegClass intype> { 3162 def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3163 (ins Int64Regs:$t, Int64Regs:$s)>; 3164 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3165 (ins Int64Regs:$t, i64imm:$s)>; 3166 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3167 (ins i64imm:$t, Int64Regs:$s)>; 3168 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3169 (ins i64imm:$t, i64imm:$s)>; 3170} 3171 3172defm TEX_CUBE_F32_F32_LEVEL 3173 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3174defm TEX_CUBE_S32_F32_LEVEL 3175 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3176defm TEX_CUBE_U32_F32_LEVEL 3177 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3178 3179class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3180 NVPTXRegClass intype, dag texsamp> 3181 : NVPTXInst<(outs outtype:$r, outtype:$g, 3182 outtype:$b, outtype:$a), 3183 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3184 intype:$z)), 3185 inst # " \t\\{$r, $g, $b, $a\\}," 3186 " [$t, $s, \\{$l, $x, $y, $z\\}];", 3187 []>; 3188 3189multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3190 NVPTXRegClass intype> { 3191 def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3192 (ins Int64Regs:$t, Int64Regs:$s)>; 3193 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3194 (ins Int64Regs:$t, i64imm:$s)>; 3195 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3196 (ins i64imm:$t, Int64Regs:$s)>; 3197 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3198 (ins i64imm:$t, i64imm:$s)>; 3199} 3200 3201defm TEX_CUBE_ARRAY_F32_F32 3202 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3203defm TEX_CUBE_ARRAY_S32_F32 3204 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3205defm TEX_CUBE_ARRAY_U32_F32 3206 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3207 3208class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3209 NVPTXRegClass intype, dag texsamp> 3210 : NVPTXInst<(outs outtype:$r, outtype:$g, 3211 outtype:$b, outtype:$a), 3212 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3213 intype:$z, intype:$lod)), 3214 inst # " \t\\{$r, $g, $b, $a\\}," 3215 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3216 []>; 3217 3218multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3219 NVPTXRegClass intype> { 3220 def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3221 (ins Int64Regs:$t, Int64Regs:$s)>; 3222 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3223 (ins Int64Regs:$t, i64imm:$s)>; 3224 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3225 (ins i64imm:$t, Int64Regs:$s)>; 3226 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3227 (ins i64imm:$t, i64imm:$s)>; 3228} 3229 3230defm TEX_CUBE_ARRAY_F32_F32_LEVEL 3231 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3232 Float32Regs, Float32Regs>; 3233defm TEX_CUBE_ARRAY_S32_F32_LEVEL 3234 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3235 Int32Regs, Float32Regs>; 3236defm TEX_CUBE_ARRAY_U32_F32_LEVEL 3237 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3238 Int32Regs, Float32Regs>; 3239 3240class TLD4_2D_base<string inst, NVPTXRegClass outtype, 3241 NVPTXRegClass intype, dag texsamp> 3242 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3243 outtype:$v2, outtype:$v3), 3244 !con(texsamp, (ins intype:$x, intype:$y)), 3245 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", 3246 []>; 3247 3248multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> { 3249 def _RR : TLD4_2D_base<inst, outtype, intype, 3250 (ins Int64Regs:$t, Int64Regs:$s)>; 3251 def _RI : TLD4_2D_base<inst, outtype, intype, 3252 (ins Int64Regs:$t, i64imm:$s)>; 3253 def _IR : TLD4_2D_base<inst, outtype, intype, 3254 (ins i64imm:$t, Int64Regs:$s)>; 3255 def _II : TLD4_2D_base<inst, outtype, intype, 3256 (ins i64imm:$t, i64imm:$s)>; 3257} 3258 3259defm TLD4_R_2D_F32_F32 3260 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3261defm TLD4_G_2D_F32_F32 3262 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3263defm TLD4_B_2D_F32_F32 3264 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3265defm TLD4_A_2D_F32_F32 3266 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3267 3268defm TLD4_R_2D_S32_F32 3269 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3270defm TLD4_G_2D_S32_F32 3271 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3272defm TLD4_B_2D_S32_F32 3273 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3274defm TLD4_A_2D_S32_F32 3275 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3276 3277defm TLD4_R_2D_U32_F32 3278 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3279defm TLD4_G_2D_U32_F32 3280 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3281defm TLD4_B_2D_U32_F32 3282 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3283defm TLD4_A_2D_U32_F32 3284 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3285 3286} 3287 3288 3289// texmode_unified 3290let IsTex = true, IsTexModeUnified = true in { 3291// Texture fetch instructions using handles 3292 3293class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, 3294 NVPTXRegClass intype, dag tex> 3295 : NVPTXInst<(outs outtype:$r, outtype:$g, 3296 outtype:$b, outtype:$a), 3297 !con(tex, (ins intype:$x)), 3298 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3299 []>; 3300 3301multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, 3302 NVPTXRegClass intype> { 3303 def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3304 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; 3305} 3306 3307defm TEX_UNIFIED_1D_F32_S32 3308 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>; 3309defm TEX_UNIFIED_1D_F32_F32 3310 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3311defm TEX_UNIFIED_1D_S32_S32 3312 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>; 3313defm TEX_UNIFIED_1D_S32_F32 3314 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3315defm TEX_UNIFIED_1D_U32_S32 3316 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>; 3317defm TEX_UNIFIED_1D_U32_F32 3318 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3319 3320class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 3321 NVPTXRegClass intype, dag tex> 3322 : NVPTXInst<(outs outtype:$r, outtype:$g, 3323 outtype:$b, outtype:$a), 3324 !con(tex, (ins intype:$x, intype:$lod)), 3325 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", 3326 []>; 3327 3328multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, 3329 NVPTXRegClass intype> { 3330 def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3331 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3332} 3333 3334defm TEX_UNIFIED_1D_F32_F32_LEVEL 3335 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3336defm TEX_UNIFIED_1D_S32_F32_LEVEL 3337 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3338defm TEX_UNIFIED_1D_U32_F32_LEVEL 3339 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3340 3341class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, 3342 NVPTXRegClass intype, dag tex> 3343 : NVPTXInst<(outs outtype:$r, outtype:$g, 3344 outtype:$b, outtype:$a), 3345 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), 3346 inst # " \t\\{$r, $g, $b, $a\\}," 3347 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3348 []>; 3349 3350multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, 3351 NVPTXRegClass intype> { 3352 def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3353 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3354} 3355 3356defm TEX_UNIFIED_1D_F32_F32_GRAD 3357 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>; 3358defm TEX_UNIFIED_1D_S32_F32_GRAD 3359 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>; 3360defm TEX_UNIFIED_1D_U32_F32_GRAD 3361 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>; 3362 3363class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 3364 NVPTXRegClass intype, dag tex> 3365 : NVPTXInst<(outs outtype:$r, outtype:$g, 3366 outtype:$b, outtype:$a), 3367 !con(tex, (ins Int32Regs:$l, intype:$x)), 3368 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", 3369 []>; 3370 3371multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, 3372 NVPTXRegClass intype> { 3373 def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3374 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3375} 3376 3377defm TEX_UNIFIED_1D_ARRAY_F32_S32 3378 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>; 3379defm TEX_UNIFIED_1D_ARRAY_F32_F32 3380 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>; 3381defm TEX_UNIFIED_1D_ARRAY_S32_S32 3382 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>; 3383defm TEX_UNIFIED_1D_ARRAY_S32_F32 3384 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>; 3385defm TEX_UNIFIED_1D_ARRAY_U32_S32 3386 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>; 3387defm TEX_UNIFIED_1D_ARRAY_U32_F32 3388 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>; 3389 3390class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3391 NVPTXRegClass intype, dag tex> 3392 : NVPTXInst<(outs outtype:$r, outtype:$g, 3393 outtype:$b, outtype:$a), 3394 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), 3395 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", 3396 []>; 3397 3398multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3399 NVPTXRegClass intype> { 3400 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3401 (ins Int64Regs:$t)>; 3402 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3403 (ins i64imm:$t)>; 3404} 3405 3406defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3407 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", 3408 Float32Regs, Float32Regs>; 3409defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3410 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", 3411 Int32Regs, Float32Regs>; 3412defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3413 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", 3414 Int32Regs, Float32Regs>; 3415 3416class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3417 NVPTXRegClass intype, dag tex> 3418 : NVPTXInst<(outs outtype:$r, outtype:$g, 3419 outtype:$b, outtype:$a), 3420 !con(tex, (ins Int32Regs:$l, intype:$x, 3421 intype:$gradx, intype:$grady)), 3422 inst # " \t\\{$r, $g, $b, $a\\}," 3423 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3424 []>; 3425 3426multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3427 NVPTXRegClass intype> { 3428 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3429 (ins Int64Regs:$t)>; 3430 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3431 (ins i64imm:$t)>; 3432} 3433 3434defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3435 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", 3436 Float32Regs, Float32Regs>; 3437defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3438 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", 3439 Int32Regs, Float32Regs>; 3440defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3441 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", 3442 Int32Regs, Float32Regs>; 3443 3444class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3445 NVPTXRegClass intype, dag tex> 3446 : NVPTXInst<(outs outtype:$r, outtype:$g, 3447 outtype:$b, outtype:$a), 3448 !con(tex, (ins intype:$x, intype:$y)), 3449 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", 3450 []>; 3451 3452multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3453 NVPTXRegClass intype> { 3454 def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3455 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3456} 3457 3458defm TEX_UNIFIED_2D_F32_S32 3459 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>; 3460defm TEX_UNIFIED_2D_F32_F32 3461 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3462defm TEX_UNIFIED_2D_S32_S32 3463 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>; 3464defm TEX_UNIFIED_2D_S32_F32 3465 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3466defm TEX_UNIFIED_2D_U32_S32 3467 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>; 3468defm TEX_UNIFIED_2D_U32_F32 3469 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3470 3471class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 3472 NVPTXRegClass intype, dag tex> 3473 : NVPTXInst<(outs outtype:$r, outtype:$g, 3474 outtype:$b, outtype:$a), 3475 !con(tex, (ins intype:$x, intype:$y, intype:$lod)), 3476 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", 3477 []>; 3478 3479multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, 3480 NVPTXRegClass intype> { 3481 def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3482 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3483} 3484 3485defm TEX_UNIFIED_2D_F32_F32_LEVEL 3486 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3487defm TEX_UNIFIED_2D_S32_F32_LEVEL 3488 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3489defm TEX_UNIFIED_2D_U32_F32_LEVEL 3490 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3491 3492class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, 3493 NVPTXRegClass intype, dag tex> 3494 : NVPTXInst<(outs outtype:$r, outtype:$g, 3495 outtype:$b, outtype:$a), 3496 !con(tex, (ins intype:$x, intype:$y, 3497 intype:$gradx0, intype:$gradx1, 3498 intype:$grady0, intype:$grady1)), 3499 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," 3500 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3501 []>; 3502multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, 3503 NVPTXRegClass intype> { 3504 def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3505 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3506} 3507 3508defm TEX_UNIFIED_2D_F32_F32_GRAD 3509 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3510defm TEX_UNIFIED_2D_S32_F32_GRAD 3511 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3512defm TEX_UNIFIED_2D_U32_F32_GRAD 3513 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3514 3515class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 3516 NVPTXRegClass intype, dag tex> 3517 : NVPTXInst<(outs outtype:$r, outtype:$g, 3518 outtype:$b, outtype:$a), 3519 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), 3520 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", 3521 []>; 3522multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, 3523 NVPTXRegClass intype> { 3524 def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3525 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3526} 3527 3528defm TEX_UNIFIED_2D_ARRAY_F32_S32 3529 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>; 3530defm TEX_UNIFIED_2D_ARRAY_F32_F32 3531 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>; 3532defm TEX_UNIFIED_2D_ARRAY_S32_S32 3533 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>; 3534defm TEX_UNIFIED_2D_ARRAY_S32_F32 3535 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>; 3536defm TEX_UNIFIED_2D_ARRAY_U32_S32 3537 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>; 3538defm TEX_UNIFIED_2D_ARRAY_U32_F32 3539 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>; 3540 3541class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3542 NVPTXRegClass intype, dag tex> 3543 : NVPTXInst<(outs outtype:$r, outtype:$g, 3544 outtype:$b, outtype:$a), 3545 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3546 intype:$lod)), 3547 inst # " \t\\{$r, $g, $b, $a\\}," 3548 " [$t, \\{$l, $x, $y, $y\\}], $lod;", 3549 []>; 3550multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3551 NVPTXRegClass intype> { 3552 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3553 (ins Int64Regs:$t)>; 3554 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3555 (ins i64imm:$t)>; 3556} 3557 3558defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3559 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", 3560 Float32Regs, Float32Regs>; 3561defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3562 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", 3563 Int32Regs, Float32Regs>; 3564defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3565 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", 3566 Int32Regs, Float32Regs>; 3567 3568class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3569 NVPTXRegClass intype, dag tex> 3570 : NVPTXInst<(outs outtype:$r, outtype:$g, 3571 outtype:$b, outtype:$a), 3572 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 3573 intype:$gradx0, intype:$gradx1, 3574 intype:$grady0, intype:$grady1)), 3575 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," 3576 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3577 []>; 3578multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3579 NVPTXRegClass intype> { 3580 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3581 (ins Int64Regs:$t)>; 3582 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 3583 (ins i64imm:$t)>; 3584} 3585 3586defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3587 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", 3588 Float32Regs, Float32Regs>; 3589defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3590 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", 3591 Int32Regs, Float32Regs>; 3592defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3593 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", 3594 Int32Regs, Float32Regs>; 3595 3596class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, 3597 NVPTXRegClass intype, dag tex> 3598 : NVPTXInst<(outs outtype:$r, outtype:$g, 3599 outtype:$b, outtype:$a), 3600 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3601 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3602 []>; 3603multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, 3604 NVPTXRegClass intype> { 3605 def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3606 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; 3607} 3608 3609defm TEX_UNIFIED_3D_F32_S32 3610 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>; 3611defm TEX_UNIFIED_3D_F32_F32 3612 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3613defm TEX_UNIFIED_3D_S32_S32 3614 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>; 3615defm TEX_UNIFIED_3D_S32_F32 3616 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3617defm TEX_UNIFIED_3D_U32_S32 3618 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>; 3619defm TEX_UNIFIED_3D_U32_F32 3620 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3621 3622class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3623 NVPTXRegClass intype, dag tex> 3624 : NVPTXInst<(outs outtype:$r, outtype:$g, 3625 outtype:$b, outtype:$a), 3626 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3627 inst # " \t\\{$r, $g, $b, $a\\}," 3628 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3629 []>; 3630multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, 3631 NVPTXRegClass intype> { 3632 def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3633 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3634} 3635 3636defm TEX_UNIFIED_3D_F32_F32_LEVEL 3637 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3638defm TEX_UNIFIED_3D_S32_F32_LEVEL 3639 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3640defm TEX_UNIFIED_3D_U32_F32_LEVEL 3641 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3642 3643class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3644 NVPTXRegClass intype, dag tex> 3645 : NVPTXInst<(outs outtype:$r, outtype:$g, 3646 outtype:$b, outtype:$a), 3647 !con(tex, (ins intype:$x, intype:$y, intype:$z, 3648 intype:$gradx0, intype:$gradx1, 3649 intype:$gradx2, intype:$grady0, 3650 intype:$grady1, intype:$grady2)), 3651 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 3652 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3653 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3654 []>; 3655multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, 3656 NVPTXRegClass intype> { 3657 def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3658 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3659} 3660 3661defm TEX_UNIFIED_3D_F32_F32_GRAD 3662 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>; 3663defm TEX_UNIFIED_3D_S32_F32_GRAD 3664 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>; 3665defm TEX_UNIFIED_3D_U32_F32_GRAD 3666 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>; 3667 3668class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, 3669 NVPTXRegClass intype, dag tex> 3670 : NVPTXInst<(outs outtype:$r, outtype:$g, 3671 outtype:$b, outtype:$a), 3672 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 3673 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 3674 []>; 3675multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, 3676 NVPTXRegClass intype> { 3677 def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3678 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; 3679} 3680 3681defm TEX_UNIFIED_CUBE_F32_F32 3682 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>; 3683defm TEX_UNIFIED_CUBE_S32_F32 3684 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>; 3685defm TEX_UNIFIED_CUBE_U32_F32 3686 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>; 3687 3688class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3689 NVPTXRegClass intype, dag tex> 3690 : NVPTXInst<(outs outtype:$r, outtype:$g, 3691 outtype:$b, outtype:$a), 3692 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 3693 inst # " \t\\{$r, $g, $b, $a\\}," 3694 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 3695 []>; 3696multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3697 NVPTXRegClass intype> { 3698 def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3699 (ins Int64Regs:$t)>; 3700 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 3701 (ins i64imm:$t)>; 3702} 3703 3704defm TEX_UNIFIED_CUBE_F32_F32_LEVEL 3705 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", 3706 Float32Regs, Float32Regs>; 3707defm TEX_UNIFIED_CUBE_S32_F32_LEVEL 3708 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", 3709 Int32Regs, Float32Regs>; 3710defm TEX_UNIFIED_CUBE_U32_F32_LEVEL 3711 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", 3712 Int32Regs, Float32Regs>; 3713 3714class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3715 NVPTXRegClass intype, dag tex> 3716 : NVPTXInst<(outs outtype:$r, outtype:$g, 3717 outtype:$b, outtype:$a), 3718 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), 3719 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", 3720 []>; 3721multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3722 NVPTXRegClass intype> { 3723 def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3724 (ins Int64Regs:$t)>; 3725 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 3726 (ins i64imm:$t)>; 3727} 3728 3729defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 3730 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>; 3731defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 3732 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>; 3733defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 3734 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>; 3735 3736class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3737 NVPTXRegClass intype, dag tex> 3738 : NVPTXInst<(outs outtype:$r, outtype:$g, 3739 outtype:$b, outtype:$a), 3740 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 3741 intype:$lod)), 3742 inst # " \t\\{$r, $g, $b, $a\\}," 3743 " [$t, \\{$l, $x, $y, $z\\}], $lod;", 3744 []>; 3745multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3746 NVPTXRegClass intype> { 3747 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3748 (ins Int64Regs:$t)>; 3749 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3750 (ins i64imm:$t)>; 3751} 3752 3753defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3754 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3755 Float32Regs, Float32Regs>; 3756defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3757 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3758 Int32Regs, Float32Regs>; 3759defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3760 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3761 Int32Regs, Float32Regs>; 3762 3763class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3764 NVPTXRegClass intype, dag tex> 3765 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3766 outtype:$v2, outtype:$v3), 3767 !con(tex, (ins intype:$x, intype:$y)), 3768 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", 3769 []>; 3770multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3771 NVPTXRegClass intype> { 3772 def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>; 3773 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3774} 3775 3776defm TLD4_UNIFIED_R_2D_F32_F32 3777 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3778defm TLD4_UNIFIED_G_2D_F32_F32 3779 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3780defm TLD4_UNIFIED_B_2D_F32_F32 3781 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3782defm TLD4_UNIFIED_A_2D_F32_F32 3783 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>; 3784 3785defm TLD4_UNIFIED_R_2D_S32_F32 3786 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3787defm TLD4_UNIFIED_G_2D_S32_F32 3788 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3789defm TLD4_UNIFIED_B_2D_S32_F32 3790 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3791defm TLD4_UNIFIED_A_2D_S32_F32 3792 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>; 3793 3794defm TLD4_UNIFIED_R_2D_U32_F32 3795 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3796defm TLD4_UNIFIED_G_2D_U32_F32 3797 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3798defm TLD4_UNIFIED_B_2D_U32_F32 3799 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3800defm TLD4_UNIFIED_A_2D_U32_F32 3801 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>; 3802 3803} 3804 3805 3806 3807//=== Surface load instructions 3808 3809let IsSuld = true in { 3810 3811class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf> 3812 : NVPTXInst<(outs outtype:$r), 3813 !con(surf, (ins Int32Regs:$x)), 3814 inst # " \\{$r\\}, [$s, \\{$x\\}];", 3815 []>; 3816multiclass SULD_1D<string inst, NVPTXRegClass outtype> { 3817 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>; 3818 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; 3819} 3820 3821defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; 3822defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; 3823defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; 3824defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; 3825 3826defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; 3827defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; 3828defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; 3829defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; 3830 3831defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; 3832defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; 3833defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; 3834defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; 3835 3836class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3837 : NVPTXInst<(outs outtype:$r), 3838 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3839 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", 3840 []>; 3841multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { 3842 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3843 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3844} 3845 3846defm SULD_1D_ARRAY_I8_CLAMP 3847 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; 3848defm SULD_1D_ARRAY_I16_CLAMP 3849 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; 3850defm SULD_1D_ARRAY_I32_CLAMP 3851 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; 3852defm SULD_1D_ARRAY_I64_CLAMP 3853 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; 3854 3855defm SULD_1D_ARRAY_I8_TRAP 3856 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; 3857defm SULD_1D_ARRAY_I16_TRAP 3858 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; 3859defm SULD_1D_ARRAY_I32_TRAP 3860 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; 3861defm SULD_1D_ARRAY_I64_TRAP 3862 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; 3863 3864defm SULD_1D_ARRAY_I8_ZERO 3865 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; 3866defm SULD_1D_ARRAY_I16_ZERO 3867 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; 3868defm SULD_1D_ARRAY_I32_ZERO 3869 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; 3870defm SULD_1D_ARRAY_I64_ZERO 3871 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; 3872 3873class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf> 3874 : NVPTXInst<(outs outtype:$r), 3875 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 3876 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", 3877 []>; 3878multiclass SULD_2D<string inst, NVPTXRegClass outtype> { 3879 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>; 3880 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; 3881} 3882 3883defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; 3884defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; 3885defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; 3886defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; 3887 3888defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; 3889defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; 3890defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; 3891defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; 3892 3893defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; 3894defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; 3895defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; 3896defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; 3897 3898class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf> 3899 : NVPTXInst<(outs outtype:$r), 3900 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 3901 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3902 []>; 3903multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { 3904 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>; 3905 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 3906} 3907 3908defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; 3909defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; 3910defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; 3911defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; 3912 3913defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; 3914defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; 3915defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; 3916defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; 3917 3918defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; 3919defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; 3920defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; 3921defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; 3922 3923class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf> 3924 : NVPTXInst<(outs outtype:$r), 3925 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 3926 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3927 []>; 3928multiclass SULD_3D<string inst, NVPTXRegClass outtype> { 3929 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>; 3930 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; 3931} 3932 3933defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; 3934defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; 3935defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; 3936defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; 3937 3938defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; 3939defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; 3940defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; 3941defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; 3942 3943defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; 3944defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; 3945defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; 3946defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; 3947} 3948 3949let IsSuld = 2 in { 3950 3951class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3952 : NVPTXInst<(outs outtype:$r, outtype:$g), 3953 !con(surf, (ins Int32Regs:$x)), 3954 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", 3955 []>; 3956multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { 3957 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3958 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; 3959} 3960 3961defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; 3962defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; 3963defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; 3964defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; 3965 3966defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; 3967defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; 3968defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; 3969defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; 3970 3971defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; 3972defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; 3973defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; 3974defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; 3975 3976class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 3977 : NVPTXInst<(outs outtype:$r, outtype:$g), 3978 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 3979 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3980 []>; 3981multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 3982 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 3983 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 3984} 3985 3986defm SULD_1D_ARRAY_V2I8_CLAMP 3987 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; 3988defm SULD_1D_ARRAY_V2I16_CLAMP 3989 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; 3990defm SULD_1D_ARRAY_V2I32_CLAMP 3991 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; 3992defm SULD_1D_ARRAY_V2I64_CLAMP 3993 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; 3994 3995defm SULD_1D_ARRAY_V2I8_TRAP 3996 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; 3997defm SULD_1D_ARRAY_V2I16_TRAP 3998 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; 3999defm SULD_1D_ARRAY_V2I32_TRAP 4000 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; 4001defm SULD_1D_ARRAY_V2I64_TRAP 4002 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; 4003 4004defm SULD_1D_ARRAY_V2I8_ZERO 4005 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; 4006defm SULD_1D_ARRAY_V2I16_ZERO 4007 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; 4008defm SULD_1D_ARRAY_V2I32_ZERO 4009 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; 4010defm SULD_1D_ARRAY_V2I64_ZERO 4011 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; 4012 4013class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4014 : NVPTXInst<(outs outtype:$r, outtype:$g), 4015 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4016 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4017 []>; 4018multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { 4019 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4020 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; 4021} 4022 4023defm SULD_2D_V2I8_CLAMP 4024 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; 4025defm SULD_2D_V2I16_CLAMP 4026 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; 4027defm SULD_2D_V2I32_CLAMP 4028 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; 4029defm SULD_2D_V2I64_CLAMP 4030 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; 4031 4032defm SULD_2D_V2I8_TRAP 4033 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; 4034defm SULD_2D_V2I16_TRAP 4035 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; 4036defm SULD_2D_V2I32_TRAP 4037 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; 4038defm SULD_2D_V2I64_TRAP 4039 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; 4040 4041defm SULD_2D_V2I8_ZERO 4042 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; 4043defm SULD_2D_V2I16_ZERO 4044 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; 4045defm SULD_2D_V2I32_ZERO 4046 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; 4047defm SULD_2D_V2I64_ZERO 4048 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; 4049 4050class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4051 : NVPTXInst<(outs outtype:$r, outtype:$g), 4052 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4053 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", 4054 []>; 4055multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 4056 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4057 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 4058} 4059 4060defm SULD_2D_ARRAY_V2I8_CLAMP 4061 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; 4062defm SULD_2D_ARRAY_V2I16_CLAMP 4063 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; 4064defm SULD_2D_ARRAY_V2I32_CLAMP 4065 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; 4066defm SULD_2D_ARRAY_V2I64_CLAMP 4067 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; 4068 4069defm SULD_2D_ARRAY_V2I8_TRAP 4070 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; 4071defm SULD_2D_ARRAY_V2I16_TRAP 4072 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; 4073defm SULD_2D_ARRAY_V2I32_TRAP 4074 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; 4075defm SULD_2D_ARRAY_V2I64_TRAP 4076 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; 4077 4078defm SULD_2D_ARRAY_V2I8_ZERO 4079 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; 4080defm SULD_2D_ARRAY_V2I16_ZERO 4081 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; 4082defm SULD_2D_ARRAY_V2I32_ZERO 4083 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; 4084defm SULD_2D_ARRAY_V2I64_ZERO 4085 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; 4086 4087class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf> 4088 : NVPTXInst<(outs outtype:$r, outtype:$g), 4089 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4090 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4091 []>; 4092multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { 4093 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>; 4094 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; 4095} 4096 4097defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; 4098defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; 4099defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; 4100defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; 4101 4102defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; 4103defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; 4104defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; 4105defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; 4106 4107defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; 4108defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; 4109defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; 4110defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; 4111 4112} 4113 4114let IsSuld = 3 in { 4115 4116class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4117 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4118 !con(surf, (ins Int32Regs:$x)), 4119 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4120 []>; 4121multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { 4122 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4123 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; 4124} 4125 4126defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; 4127defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; 4128defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; 4129 4130defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; 4131defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; 4132defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; 4133 4134defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; 4135defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; 4136defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; 4137 4138class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4139 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4140 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 4141 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", 4142 []>; 4143multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4144 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4145 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4146} 4147 4148defm SULD_1D_ARRAY_V4I8_CLAMP 4149 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; 4150defm SULD_1D_ARRAY_V4I16_CLAMP 4151 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; 4152defm SULD_1D_ARRAY_V4I32_CLAMP 4153 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; 4154 4155defm SULD_1D_ARRAY_V4I8_TRAP 4156 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; 4157defm SULD_1D_ARRAY_V4I16_TRAP 4158 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; 4159defm SULD_1D_ARRAY_V4I32_TRAP 4160 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; 4161 4162defm SULD_1D_ARRAY_V4I8_ZERO 4163 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; 4164defm SULD_1D_ARRAY_V4I16_ZERO 4165 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; 4166defm SULD_1D_ARRAY_V4I32_ZERO 4167 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; 4168 4169class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4170 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4171 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4172 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4173 []>; 4174multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { 4175 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4176 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; 4177} 4178 4179defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; 4180defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; 4181defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; 4182 4183defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; 4184defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; 4185defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; 4186 4187defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; 4188defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; 4189defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; 4190 4191class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4192 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4193 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4194 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", 4195 []>; 4196multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4197 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4198 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4199} 4200 4201defm SULD_2D_ARRAY_V4I8_CLAMP 4202 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; 4203defm SULD_2D_ARRAY_V4I16_CLAMP 4204 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; 4205defm SULD_2D_ARRAY_V4I32_CLAMP 4206 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; 4207 4208defm SULD_2D_ARRAY_V4I8_TRAP 4209 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; 4210defm SULD_2D_ARRAY_V4I16_TRAP 4211 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; 4212defm SULD_2D_ARRAY_V4I32_TRAP 4213 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; 4214 4215defm SULD_2D_ARRAY_V4I8_ZERO 4216 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; 4217defm SULD_2D_ARRAY_V4I16_ZERO 4218 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; 4219defm SULD_2D_ARRAY_V4I32_ZERO 4220 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; 4221 4222class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf> 4223 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4224 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4225 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", 4226 []>; 4227multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { 4228 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>; 4229 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; 4230} 4231 4232defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; 4233defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; 4234defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; 4235 4236defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; 4237defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; 4238defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; 4239 4240defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; 4241defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; 4242defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; 4243 4244} 4245 4246//----------------------------------- 4247// Texture Query Intrinsics 4248//----------------------------------- 4249 4250let IsSurfTexQuery = true in { 4251def TXQ_CHANNEL_ORDER_R 4252 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4253 "txq.channel_order.b32 \t$d, [$a];", 4254 []>; 4255def TXQ_CHANNEL_ORDER_I 4256 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4257 "txq.channel_order.b32 \t$d, [$a];", 4258 []>; 4259def TXQ_CHANNEL_DATA_TYPE_R 4260 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4261 "txq.channel_data_type.b32 \t$d, [$a];", 4262 []>; 4263def TXQ_CHANNEL_DATA_TYPE_I 4264 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4265 "txq.channel_data_type.b32 \t$d, [$a];", 4266 []>; 4267def TXQ_WIDTH_R 4268 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4269 "txq.width.b32 \t$d, [$a];", 4270 []>; 4271def TXQ_WIDTH_I 4272 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4273 "txq.width.b32 \t$d, [$a];", 4274 []>; 4275def TXQ_HEIGHT_R 4276 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4277 "txq.height.b32 \t$d, [$a];", 4278 []>; 4279def TXQ_HEIGHT_I 4280 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4281 "txq.height.b32 \t$d, [$a];", 4282 []>; 4283def TXQ_DEPTH_R 4284 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4285 "txq.depth.b32 \t$d, [$a];", 4286 []>; 4287def TXQ_DEPTH_I 4288 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4289 "txq.depth.b32 \t$d, [$a];", 4290 []>; 4291def TXQ_ARRAY_SIZE_R 4292 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4293 "txq.array_size.b32 \t$d, [$a];", 4294 []>; 4295def TXQ_ARRAY_SIZE_I 4296 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4297 "txq.array_size.b32 \t$d, [$a];", 4298 []>; 4299def TXQ_NUM_SAMPLES_R 4300 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4301 "txq.num_samples.b32 \t$d, [$a];", 4302 []>; 4303def TXQ_NUM_SAMPLES_I 4304 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4305 "txq.num_samples.b32 \t$d, [$a];", 4306 []>; 4307def TXQ_NUM_MIPMAP_LEVELS_R 4308 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4309 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4310 []>; 4311def TXQ_NUM_MIPMAP_LEVELS_I 4312 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4313 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4314 []>; 4315} 4316 4317def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4318 (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4319def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4320 (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4321def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4322 (TXQ_WIDTH_R Int64Regs:$a)>; 4323def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4324 (TXQ_HEIGHT_R Int64Regs:$a)>; 4325def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4326 (TXQ_DEPTH_R Int64Regs:$a)>; 4327def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4328 (TXQ_ARRAY_SIZE_R Int64Regs:$a)>; 4329def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4330 (TXQ_NUM_SAMPLES_R Int64Regs:$a)>; 4331def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4332 (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>; 4333 4334 4335//----------------------------------- 4336// Surface Query Intrinsics 4337//----------------------------------- 4338 4339let IsSurfTexQuery = true in { 4340def SUQ_CHANNEL_ORDER_R 4341 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4342 "suq.channel_order.b32 \t$d, [$a];", 4343 []>; 4344def SUQ_CHANNEL_ORDER_I 4345 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4346 "suq.channel_order.b32 \t$d, [$a];", 4347 []>; 4348def SUQ_CHANNEL_DATA_TYPE_R 4349 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4350 "suq.channel_data_type.b32 \t$d, [$a];", 4351 []>; 4352def SUQ_CHANNEL_DATA_TYPE_I 4353 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4354 "suq.channel_data_type.b32 \t$d, [$a];", 4355 []>; 4356def SUQ_WIDTH_R 4357 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4358 "suq.width.b32 \t$d, [$a];", 4359 []>; 4360def SUQ_WIDTH_I 4361 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4362 "suq.width.b32 \t$d, [$a];", 4363 []>; 4364def SUQ_HEIGHT_R 4365 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4366 "suq.height.b32 \t$d, [$a];", 4367 []>; 4368def SUQ_HEIGHT_I 4369 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4370 "suq.height.b32 \t$d, [$a];", 4371 []>; 4372def SUQ_DEPTH_R 4373 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4374 "suq.depth.b32 \t$d, [$a];", 4375 []>; 4376def SUQ_DEPTH_I 4377 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4378 "suq.depth.b32 \t$d, [$a];", 4379 []>; 4380def SUQ_ARRAY_SIZE_R 4381 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4382 "suq.array_size.b32 \t$d, [$a];", 4383 []>; 4384def SUQ_ARRAY_SIZE_I 4385 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 4386 "suq.array_size.b32 \t$d, [$a];", 4387 []>; 4388} 4389 4390def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4391 (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>; 4392def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4393 (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>; 4394def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4395 (SUQ_WIDTH_R Int64Regs:$a)>; 4396def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4397 (SUQ_HEIGHT_R Int64Regs:$a)>; 4398def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4399 (SUQ_DEPTH_R Int64Regs:$a)>; 4400def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4401 (SUQ_ARRAY_SIZE_R Int64Regs:$a)>; 4402 4403 4404//===- Handle Query -------------------------------------------------------===// 4405 4406// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4407def ISTYPEP_SAMPLER 4408 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4409 "istypep.samplerref \t$d, $a;", 4410 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4411def ISTYPEP_SURFACE 4412 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4413 "istypep.surfref \t$d, $a;", 4414 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4415def ISTYPEP_TEXTURE 4416 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4417 "istypep.texref \t$d, $a;", 4418 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4419 4420//===- Surface Stores -----------------------------------------------------===// 4421 4422let IsSust = true in { 4423 4424class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf> 4425 : NVPTXInst<(outs), 4426 !con(surf, (ins Int32Regs:$x, intype:$r)), 4427 inst # " \t[$s, \\{$x\\}], \\{$r\\};", 4428 []>; 4429multiclass SUST_1D<string inst, NVPTXRegClass intype> { 4430 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>; 4431 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>; 4432} 4433 4434defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; 4435defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; 4436defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; 4437defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; 4438 4439defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; 4440defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; 4441defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; 4442defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; 4443 4444defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; 4445defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; 4446defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; 4447defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; 4448 4449defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; 4450defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; 4451defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; 4452 4453class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4454 : NVPTXInst<(outs), 4455 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), 4456 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", 4457 []>; 4458multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> { 4459 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4460 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>; 4461} 4462 4463defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; 4464defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; 4465defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; 4466defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; 4467 4468defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; 4469defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; 4470defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; 4471defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; 4472 4473defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; 4474defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; 4475defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; 4476defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; 4477 4478defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; 4479defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; 4480defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; 4481 4482class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4483 : NVPTXInst<(outs), 4484 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, 4485 intype:$b, intype:$a)), 4486 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4487 []>; 4488multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> { 4489 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4490 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>; 4491} 4492 4493defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; 4494defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; 4495defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; 4496 4497defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; 4498defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; 4499defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; 4500 4501defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; 4502defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; 4503defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; 4504 4505defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; 4506defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; 4507defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; 4508 4509class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4510 : NVPTXInst<(outs), 4511 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), 4512 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4513 []>; 4514multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> { 4515 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4516 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4517} 4518 4519defm SUST_B_1D_ARRAY_B8_CLAMP 4520 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; 4521defm SUST_B_1D_ARRAY_B16_CLAMP 4522 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; 4523defm SUST_B_1D_ARRAY_B32_CLAMP 4524 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; 4525defm SUST_B_1D_ARRAY_B64_CLAMP 4526 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; 4527 4528defm SUST_B_1D_ARRAY_B8_TRAP 4529 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; 4530defm SUST_B_1D_ARRAY_B16_TRAP 4531 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; 4532defm SUST_B_1D_ARRAY_B32_TRAP 4533 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; 4534defm SUST_B_1D_ARRAY_B64_TRAP 4535 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; 4536 4537defm SUST_B_1D_ARRAY_B8_ZERO 4538 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; 4539defm SUST_B_1D_ARRAY_B16_ZERO 4540 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; 4541defm SUST_B_1D_ARRAY_B32_ZERO 4542 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; 4543defm SUST_B_1D_ARRAY_B64_ZERO 4544 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; 4545 4546defm SUST_P_1D_ARRAY_B8_TRAP 4547 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; 4548defm SUST_P_1D_ARRAY_B16_TRAP 4549 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; 4550defm SUST_P_1D_ARRAY_B32_TRAP 4551 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; 4552 4553class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4554 : NVPTXInst<(outs), 4555 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4556 intype:$r, intype:$g)), 4557 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4558 []>; 4559multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4560 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4561 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4562} 4563 4564defm SUST_B_1D_ARRAY_V2B8_CLAMP 4565 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; 4566defm SUST_B_1D_ARRAY_V2B16_CLAMP 4567 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; 4568defm SUST_B_1D_ARRAY_V2B32_CLAMP 4569 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; 4570defm SUST_B_1D_ARRAY_V2B64_CLAMP 4571 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; 4572 4573defm SUST_B_1D_ARRAY_V2B8_TRAP 4574 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; 4575defm SUST_B_1D_ARRAY_V2B16_TRAP 4576 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; 4577defm SUST_B_1D_ARRAY_V2B32_TRAP 4578 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; 4579defm SUST_B_1D_ARRAY_V2B64_TRAP 4580 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; 4581 4582defm SUST_B_1D_ARRAY_V2B8_ZERO 4583 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; 4584defm SUST_B_1D_ARRAY_V2B16_ZERO 4585 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; 4586defm SUST_B_1D_ARRAY_V2B32_ZERO 4587 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; 4588defm SUST_B_1D_ARRAY_V2B64_ZERO 4589 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; 4590 4591defm SUST_P_1D_ARRAY_V2B8_TRAP 4592 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; 4593defm SUST_P_1D_ARRAY_V2B16_TRAP 4594 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; 4595defm SUST_P_1D_ARRAY_V2B32_TRAP 4596 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; 4597 4598class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4599 : NVPTXInst<(outs), 4600 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 4601 intype:$r, intype:$g, intype:$b, intype:$a)), 4602 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", 4603 []>; 4604multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4605 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4606 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4607} 4608 4609defm SUST_B_1D_ARRAY_V4B8_CLAMP 4610 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; 4611defm SUST_B_1D_ARRAY_V4B16_CLAMP 4612 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; 4613defm SUST_B_1D_ARRAY_V4B32_CLAMP 4614 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; 4615 4616defm SUST_B_1D_ARRAY_V4B8_TRAP 4617 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; 4618defm SUST_B_1D_ARRAY_V4B16_TRAP 4619 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; 4620defm SUST_B_1D_ARRAY_V4B32_TRAP 4621 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; 4622 4623defm SUST_B_1D_ARRAY_V4B8_ZERO 4624 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; 4625defm SUST_B_1D_ARRAY_V4B16_ZERO 4626 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; 4627defm SUST_B_1D_ARRAY_V4B32_ZERO 4628 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; 4629 4630defm SUST_P_1D_ARRAY_V4B8_TRAP 4631 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; 4632defm SUST_P_1D_ARRAY_V4B16_TRAP 4633 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; 4634defm SUST_P_1D_ARRAY_V4B32_TRAP 4635 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; 4636 4637class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf> 4638 : NVPTXInst<(outs), 4639 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), 4640 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", 4641 []>; 4642multiclass SUST_2D<string inst, NVPTXRegClass intype> { 4643 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>; 4644 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>; 4645} 4646 4647defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; 4648defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; 4649defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; 4650defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; 4651 4652defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; 4653defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; 4654defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; 4655defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; 4656 4657defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; 4658defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; 4659defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; 4660defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; 4661 4662defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; 4663defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; 4664defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; 4665 4666class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4667 : NVPTXInst<(outs), 4668 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4669 intype:$r, intype:$g)), 4670 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4671 []>; 4672multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> { 4673 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4674 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>; 4675} 4676 4677defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; 4678defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; 4679defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; 4680defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; 4681 4682defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; 4683defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; 4684defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; 4685defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; 4686 4687defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; 4688defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; 4689defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; 4690defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; 4691 4692defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; 4693defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; 4694defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; 4695 4696class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4697 : NVPTXInst<(outs), 4698 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 4699 intype:$r, intype:$g, intype:$b, intype:$a)), 4700 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", 4701 []>; 4702multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> { 4703 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4704 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>; 4705} 4706 4707defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; 4708defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; 4709defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; 4710 4711defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; 4712defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; 4713defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; 4714 4715defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; 4716defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; 4717defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; 4718 4719defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; 4720defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; 4721defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; 4722 4723class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 4724 : NVPTXInst<(outs), 4725 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4726 intype:$r)), 4727 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4728 []>; 4729multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> { 4730 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 4731 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 4732} 4733 4734defm SUST_B_2D_ARRAY_B8_CLAMP 4735 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; 4736defm SUST_B_2D_ARRAY_B16_CLAMP 4737 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; 4738defm SUST_B_2D_ARRAY_B32_CLAMP 4739 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; 4740defm SUST_B_2D_ARRAY_B64_CLAMP 4741 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; 4742 4743defm SUST_B_2D_ARRAY_B8_TRAP 4744 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; 4745defm SUST_B_2D_ARRAY_B16_TRAP 4746 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; 4747defm SUST_B_2D_ARRAY_B32_TRAP 4748 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; 4749defm SUST_B_2D_ARRAY_B64_TRAP 4750 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; 4751 4752defm SUST_B_2D_ARRAY_B8_ZERO 4753 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; 4754defm SUST_B_2D_ARRAY_B16_ZERO 4755 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; 4756defm SUST_B_2D_ARRAY_B32_ZERO 4757 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; 4758defm SUST_B_2D_ARRAY_B64_ZERO 4759 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; 4760 4761defm SUST_P_2D_ARRAY_B8_TRAP 4762 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; 4763defm SUST_P_2D_ARRAY_B16_TRAP 4764 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; 4765defm SUST_P_2D_ARRAY_B32_TRAP 4766 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; 4767 4768class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 4769 : NVPTXInst<(outs), 4770 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4771 intype:$r, intype:$g)), 4772 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", 4773 []>; 4774multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> { 4775 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 4776 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 4777} 4778 4779defm SUST_B_2D_ARRAY_V2B8_CLAMP 4780 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; 4781defm SUST_B_2D_ARRAY_V2B16_CLAMP 4782 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; 4783defm SUST_B_2D_ARRAY_V2B32_CLAMP 4784 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; 4785defm SUST_B_2D_ARRAY_V2B64_CLAMP 4786 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; 4787 4788defm SUST_B_2D_ARRAY_V2B8_TRAP 4789 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; 4790defm SUST_B_2D_ARRAY_V2B16_TRAP 4791 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; 4792defm SUST_B_2D_ARRAY_V2B32_TRAP 4793 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; 4794defm SUST_B_2D_ARRAY_V2B64_TRAP 4795 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; 4796 4797defm SUST_B_2D_ARRAY_V2B8_ZERO 4798 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; 4799defm SUST_B_2D_ARRAY_V2B16_ZERO 4800 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; 4801defm SUST_B_2D_ARRAY_V2B32_ZERO 4802 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; 4803defm SUST_B_2D_ARRAY_V2B64_ZERO 4804 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; 4805 4806defm SUST_P_2D_ARRAY_V2B8_TRAP 4807 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; 4808defm SUST_P_2D_ARRAY_V2B16_TRAP 4809 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; 4810defm SUST_P_2D_ARRAY_V2B32_TRAP 4811 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; 4812 4813class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 4814 : NVPTXInst<(outs), 4815 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4816 intype:$r, intype:$g, intype:$b, intype:$a)), 4817 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", 4818 []>; 4819multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> { 4820 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 4821 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 4822} 4823 4824defm SUST_B_2D_ARRAY_V4B8_CLAMP 4825 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; 4826defm SUST_B_2D_ARRAY_V4B16_CLAMP 4827 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; 4828defm SUST_B_2D_ARRAY_V4B32_CLAMP 4829 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; 4830 4831defm SUST_B_2D_ARRAY_V4B8_TRAP 4832 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; 4833defm SUST_B_2D_ARRAY_V4B16_TRAP 4834 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; 4835defm SUST_B_2D_ARRAY_V4B32_TRAP 4836 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; 4837 4838defm SUST_B_2D_ARRAY_V4B8_ZERO 4839 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; 4840defm SUST_B_2D_ARRAY_V4B16_ZERO 4841 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; 4842defm SUST_B_2D_ARRAY_V4B32_ZERO 4843 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; 4844 4845defm SUST_P_2D_ARRAY_V4B8_TRAP 4846 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; 4847defm SUST_P_2D_ARRAY_V4B16_TRAP 4848 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; 4849defm SUST_P_2D_ARRAY_V4B32_TRAP 4850 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; 4851 4852class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf> 4853 : NVPTXInst<(outs), 4854 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4855 intype:$r)), 4856 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4857 []>; 4858multiclass SUST_3D<string inst, NVPTXRegClass intype> { 4859 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>; 4860 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>; 4861} 4862 4863defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; 4864defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; 4865defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; 4866defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; 4867 4868defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; 4869defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; 4870defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; 4871defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; 4872 4873defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; 4874defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; 4875defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; 4876defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; 4877 4878defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; 4879defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; 4880defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; 4881 4882class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf> 4883 : NVPTXInst<(outs), 4884 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4885 intype:$r, intype:$g)), 4886 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", 4887 []>; 4888multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> { 4889 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>; 4890 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>; 4891} 4892 4893defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; 4894defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; 4895defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; 4896defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; 4897 4898defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; 4899defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; 4900defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; 4901defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; 4902 4903defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; 4904defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; 4905defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; 4906defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; 4907 4908defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; 4909defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; 4910defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; 4911 4912class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf> 4913 : NVPTXInst<(outs), 4914 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4915 intype:$r, intype:$g, intype:$b, intype:$a)), 4916 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", 4917 []>; 4918multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> { 4919 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>; 4920 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>; 4921} 4922 4923defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; 4924defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; 4925defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; 4926 4927defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; 4928defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; 4929defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; 4930 4931defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; 4932defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; 4933defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; 4934 4935defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; 4936defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; 4937defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; 4938 4939} 4940 4941// Surface store instruction patterns 4942// I'm not sure why we can't just include these in the instruction definitions, 4943// but TableGen complains of type errors :( 4944 4945// .clamp variant 4946def : Pat<(int_nvvm_sust_b_1d_i8_clamp 4947 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4948 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4949 4950def : Pat<(int_nvvm_sust_b_1d_i16_clamp 4951 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4952 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 4953 4954def : Pat<(int_nvvm_sust_b_1d_i32_clamp 4955 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4956 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 4957 4958def : Pat<(int_nvvm_sust_b_1d_i64_clamp 4959 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4960 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 4961 4962def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 4963 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4964 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4965 Int16Regs:$r, Int16Regs:$g)>; 4966 4967def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 4968 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4969 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4970 Int16Regs:$r, Int16Regs:$g)>; 4971 4972def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 4973 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4974 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4975 Int32Regs:$r, Int32Regs:$g)>; 4976 4977def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 4978 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4979 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4980 Int64Regs:$r, Int64Regs:$g)>; 4981 4982def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 4983 Int64Regs:$s, Int32Regs:$x, 4984 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4985 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4986 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4987 4988def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 4989 Int64Regs:$s, Int32Regs:$x, 4990 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4991 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4992 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 4993 4994def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 4995 Int64Regs:$s, Int32Regs:$x, 4996 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4997 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 4998 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 4999 5000 5001 5002def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 5003 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5004 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5005 Int16Regs:$r)>; 5006 5007def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 5008 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5009 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5010 Int16Regs:$r)>; 5011 5012def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 5013 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5014 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5015 Int32Regs:$r)>; 5016 5017def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 5018 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5019 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5020 Int64Regs:$r)>; 5021 5022def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 5023 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5024 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5025 Int16Regs:$r, Int16Regs:$g)>; 5026 5027def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 5028 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5029 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5030 Int16Regs:$r, Int16Regs:$g)>; 5031 5032def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 5033 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5034 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5035 Int32Regs:$r, Int32Regs:$g)>; 5036 5037def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 5038 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5039 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5040 Int64Regs:$r, Int64Regs:$g)>; 5041 5042def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 5043 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5044 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5045 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5046 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5047 5048def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 5049 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5050 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5051 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5052 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5053 5054def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 5055 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5056 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5057 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5058 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5059 5060 5061 5062def : Pat<(int_nvvm_sust_b_2d_i8_clamp 5063 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5064 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5065 Int16Regs:$r)>; 5066 5067def : Pat<(int_nvvm_sust_b_2d_i16_clamp 5068 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5069 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5070 Int16Regs:$r)>; 5071 5072def : Pat<(int_nvvm_sust_b_2d_i32_clamp 5073 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5074 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5075 Int32Regs:$r)>; 5076 5077def : Pat<(int_nvvm_sust_b_2d_i64_clamp 5078 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5079 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5080 Int64Regs:$r)>; 5081 5082def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 5083 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5084 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5085 Int16Regs:$r, Int16Regs:$g)>; 5086 5087def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 5088 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5089 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5090 Int16Regs:$r, Int16Regs:$g)>; 5091 5092def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 5093 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5094 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5095 Int32Regs:$r, Int32Regs:$g)>; 5096 5097def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 5098 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5099 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5100 Int64Regs:$r, Int64Regs:$g)>; 5101 5102def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 5103 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5104 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5105 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5106 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5107 5108def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 5109 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5110 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5111 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5112 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5113 5114def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 5115 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5116 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5117 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5118 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5119 5120 5121 5122def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 5123 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5124 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, 5125 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5126 Int16Regs:$r)>; 5127 5128def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 5129 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5130 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, 5131 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5132 Int16Regs:$r)>; 5133 5134def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 5135 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5136 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, 5137 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5138 Int32Regs:$r)>; 5139 5140def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 5141 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5142 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, 5143 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5144 Int64Regs:$r)>; 5145 5146def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 5147 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5148 Int16Regs:$r, Int16Regs:$g), 5149 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5150 Int32Regs:$x, Int32Regs:$y, 5151 Int16Regs:$r, Int16Regs:$g)>; 5152 5153def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 5154 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5155 Int16Regs:$r, Int16Regs:$g), 5156 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5157 Int32Regs:$x, Int32Regs:$y, 5158 Int16Regs:$r, Int16Regs:$g)>; 5159 5160def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 5161 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5162 Int32Regs:$g), 5163 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5164 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5165 5166def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 5167 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5168 Int64Regs:$g), 5169 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5170 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5171 5172def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 5173 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5174 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5175 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, 5176 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5177 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5178 5179def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 5180 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5181 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5182 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, 5183 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5184 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5185 5186def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 5187 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5188 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5189 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5190 Int32Regs:$x, Int32Regs:$y, 5191 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5192 5193 5194 5195def : Pat<(int_nvvm_sust_b_3d_i8_clamp 5196 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5197 Int16Regs:$r), 5198 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, 5199 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5200 Int16Regs:$r)>; 5201 5202def : Pat<(int_nvvm_sust_b_3d_i16_clamp 5203 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5204 Int16Regs:$r), 5205 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, 5206 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5207 Int16Regs:$r)>; 5208 5209def : Pat<(int_nvvm_sust_b_3d_i32_clamp 5210 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5211 Int32Regs:$r), 5212 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, 5213 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5214 Int32Regs:$r)>; 5215 5216def : Pat<(int_nvvm_sust_b_3d_i64_clamp 5217 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5218 Int64Regs:$r), 5219 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, 5220 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5221 Int64Regs:$r)>; 5222 5223def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 5224 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5225 Int16Regs:$r, Int16Regs:$g), 5226 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, 5227 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5228 Int16Regs:$r, Int16Regs:$g)>; 5229 5230def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 5231 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5232 Int16Regs:$r, Int16Regs:$g), 5233 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, 5234 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5235 Int16Regs:$r, Int16Regs:$g)>; 5236 5237def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 5238 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5239 Int32Regs:$r, Int32Regs:$g), 5240 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, 5241 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5242 Int32Regs:$r, Int32Regs:$g)>; 5243 5244def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 5245 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5246 Int64Regs:$r, Int64Regs:$g), 5247 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, 5248 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5249 Int64Regs:$r, Int64Regs:$g)>; 5250 5251def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 5252 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5253 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5254 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, 5255 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5256 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5257 5258def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 5259 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5260 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5261 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, 5262 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5263 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5264 5265def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 5266 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5267 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5268 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, 5269 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5270 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5271 5272 5273// .trap variant 5274def : Pat<(int_nvvm_sust_b_1d_i8_trap 5275 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5276 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5277 5278def : Pat<(int_nvvm_sust_b_1d_i16_trap 5279 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5280 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5281 5282def : Pat<(int_nvvm_sust_b_1d_i32_trap 5283 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5284 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5285 5286def : Pat<(int_nvvm_sust_b_1d_i64_trap 5287 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5288 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5289 5290def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 5291 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5292 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5293 Int16Regs:$r, Int16Regs:$g)>; 5294 5295def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 5296 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5297 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5298 Int16Regs:$r, Int16Regs:$g)>; 5299 5300def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 5301 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5302 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5303 Int32Regs:$r, Int32Regs:$g)>; 5304 5305def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 5306 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5307 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, 5308 Int64Regs:$r, Int64Regs:$g)>; 5309 5310def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 5311 Int64Regs:$s, Int32Regs:$x, 5312 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5313 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5314 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5315 5316def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 5317 Int64Regs:$s, Int32Regs:$x, 5318 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5319 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5320 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5321 5322def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 5323 Int64Regs:$s, Int32Regs:$x, 5324 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5325 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5326 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5327 5328 5329 5330def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 5331 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5332 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5333 Int16Regs:$r)>; 5334 5335def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 5336 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5337 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5338 Int16Regs:$r)>; 5339 5340def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 5341 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5342 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5343 Int32Regs:$r)>; 5344 5345def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 5346 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5347 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5348 Int64Regs:$r)>; 5349 5350def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 5351 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5352 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5353 Int16Regs:$r, Int16Regs:$g)>; 5354 5355def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 5356 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5357 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5358 Int16Regs:$r, Int16Regs:$g)>; 5359 5360def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 5361 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5362 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5363 Int32Regs:$r, Int32Regs:$g)>; 5364 5365def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 5366 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5367 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5368 Int64Regs:$r, Int64Regs:$g)>; 5369 5370def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 5371 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5372 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5373 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5374 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5375 5376def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 5377 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5378 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5379 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5380 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5381 5382def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 5383 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5384 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5385 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5386 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5387 5388 5389 5390def : Pat<(int_nvvm_sust_b_2d_i8_trap 5391 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5392 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5393 Int16Regs:$r)>; 5394 5395def : Pat<(int_nvvm_sust_b_2d_i16_trap 5396 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5397 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5398 Int16Regs:$r)>; 5399 5400def : Pat<(int_nvvm_sust_b_2d_i32_trap 5401 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5402 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5403 Int32Regs:$r)>; 5404 5405def : Pat<(int_nvvm_sust_b_2d_i64_trap 5406 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5407 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5408 Int64Regs:$r)>; 5409 5410def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 5411 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5412 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5413 Int16Regs:$r, Int16Regs:$g)>; 5414 5415def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 5416 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5417 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5418 Int16Regs:$r, Int16Regs:$g)>; 5419 5420def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 5421 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5422 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5423 Int32Regs:$r, Int32Regs:$g)>; 5424 5425def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 5426 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5427 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5428 Int64Regs:$r, Int64Regs:$g)>; 5429 5430def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 5431 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5432 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5433 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5434 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5435 5436def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 5437 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5438 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5439 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5440 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5441 5442def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 5443 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5444 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5445 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5446 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5447 5448 5449 5450def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 5451 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5452 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 5453 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5454 Int16Regs:$r)>; 5455 5456def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 5457 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5458 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 5459 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5460 Int16Regs:$r)>; 5461 5462def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 5463 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5464 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 5465 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5466 Int32Regs:$r)>; 5467 5468def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 5469 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5470 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, 5471 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5472 Int64Regs:$r)>; 5473 5474def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 5475 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5476 Int16Regs:$r, Int16Regs:$g), 5477 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 5478 Int32Regs:$x, Int32Regs:$y, 5479 Int16Regs:$r, Int16Regs:$g)>; 5480 5481def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 5482 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5483 Int16Regs:$r, Int16Regs:$g), 5484 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 5485 Int32Regs:$x, Int32Regs:$y, 5486 Int16Regs:$r, Int16Regs:$g)>; 5487 5488def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 5489 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5490 Int32Regs:$g), 5491 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5492 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5493 5494def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 5495 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5496 Int64Regs:$g), 5497 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, 5498 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5499 5500def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 5501 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5502 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5503 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 5504 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5505 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5506 5507def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 5508 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5509 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5510 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 5511 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5512 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5513 5514def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 5515 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5516 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5517 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 5518 Int32Regs:$x, Int32Regs:$y, 5519 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5520 5521 5522 5523def : Pat<(int_nvvm_sust_b_3d_i8_trap 5524 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5525 Int16Regs:$r), 5526 (SUST_B_3D_B8_TRAP_R Int64Regs:$s, 5527 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5528 Int16Regs:$r)>; 5529 5530def : Pat<(int_nvvm_sust_b_3d_i16_trap 5531 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5532 Int16Regs:$r), 5533 (SUST_B_3D_B16_TRAP_R Int64Regs:$s, 5534 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5535 Int16Regs:$r)>; 5536 5537def : Pat<(int_nvvm_sust_b_3d_i32_trap 5538 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5539 Int32Regs:$r), 5540 (SUST_B_3D_B32_TRAP_R Int64Regs:$s, 5541 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5542 Int32Regs:$r)>; 5543 5544def : Pat<(int_nvvm_sust_b_3d_i64_trap 5545 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5546 Int64Regs:$r), 5547 (SUST_B_3D_B64_TRAP_R Int64Regs:$s, 5548 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5549 Int64Regs:$r)>; 5550 5551def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 5552 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5553 Int16Regs:$r, Int16Regs:$g), 5554 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, 5555 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5556 Int16Regs:$r, Int16Regs:$g)>; 5557 5558def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 5559 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5560 Int16Regs:$r, Int16Regs:$g), 5561 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, 5562 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5563 Int16Regs:$r, Int16Regs:$g)>; 5564 5565def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 5566 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5567 Int32Regs:$r, Int32Regs:$g), 5568 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, 5569 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5570 Int32Regs:$r, Int32Regs:$g)>; 5571 5572def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 5573 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5574 Int64Regs:$r, Int64Regs:$g), 5575 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, 5576 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5577 Int64Regs:$r, Int64Regs:$g)>; 5578 5579def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 5580 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5581 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5582 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, 5583 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5584 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5585 5586def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 5587 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5588 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5589 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, 5590 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5591 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5592 5593def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 5594 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5595 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5596 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, 5597 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5598 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5599 5600 5601// .zero variant 5602def : Pat<(int_nvvm_sust_b_1d_i8_zero 5603 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5604 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5605 5606def : Pat<(int_nvvm_sust_b_1d_i16_zero 5607 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5608 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5609 5610def : Pat<(int_nvvm_sust_b_1d_i32_zero 5611 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5612 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5613 5614def : Pat<(int_nvvm_sust_b_1d_i64_zero 5615 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5616 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5617 5618def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 5619 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5620 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5621 Int16Regs:$r, Int16Regs:$g)>; 5622 5623def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 5624 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5625 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5626 Int16Regs:$r, Int16Regs:$g)>; 5627 5628def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 5629 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5630 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5631 Int32Regs:$r, Int32Regs:$g)>; 5632 5633def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 5634 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5635 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, 5636 Int64Regs:$r, Int64Regs:$g)>; 5637 5638def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 5639 Int64Regs:$s, Int32Regs:$x, 5640 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5641 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 5642 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5643 5644def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 5645 Int64Regs:$s, Int32Regs:$x, 5646 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5647 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 5648 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5649 5650def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 5651 Int64Regs:$s, Int32Regs:$x, 5652 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5653 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 5654 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5655 5656 5657 5658def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 5659 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5660 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5661 Int16Regs:$r)>; 5662 5663def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 5664 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5665 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5666 Int16Regs:$r)>; 5667 5668def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 5669 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5670 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5671 Int32Regs:$r)>; 5672 5673def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 5674 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5675 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5676 Int64Regs:$r)>; 5677 5678def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 5679 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5680 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5681 Int16Regs:$r, Int16Regs:$g)>; 5682 5683def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 5684 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5685 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5686 Int16Regs:$r, Int16Regs:$g)>; 5687 5688def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 5689 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5690 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5691 Int32Regs:$r, Int32Regs:$g)>; 5692 5693def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 5694 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5695 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5696 Int64Regs:$r, Int64Regs:$g)>; 5697 5698def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 5699 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5700 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5701 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5702 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5703 5704def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 5705 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5706 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5707 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5708 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5709 5710def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 5711 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5712 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5713 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5714 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5715 5716 5717 5718def : Pat<(int_nvvm_sust_b_2d_i8_zero 5719 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5720 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5721 Int16Regs:$r)>; 5722 5723def : Pat<(int_nvvm_sust_b_2d_i16_zero 5724 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5725 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5726 Int16Regs:$r)>; 5727 5728def : Pat<(int_nvvm_sust_b_2d_i32_zero 5729 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5730 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5731 Int32Regs:$r)>; 5732 5733def : Pat<(int_nvvm_sust_b_2d_i64_zero 5734 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5735 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5736 Int64Regs:$r)>; 5737 5738def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 5739 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5740 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5741 Int16Regs:$r, Int16Regs:$g)>; 5742 5743def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 5744 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5745 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5746 Int16Regs:$r, Int16Regs:$g)>; 5747 5748def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 5749 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5750 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5751 Int32Regs:$r, Int32Regs:$g)>; 5752 5753def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 5754 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5755 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5756 Int64Regs:$r, Int64Regs:$g)>; 5757 5758def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 5759 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5760 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5761 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5762 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5763 5764def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 5765 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5766 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5767 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5768 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5769 5770def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 5771 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5772 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5773 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5774 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5775 5776 5777 5778def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 5779 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5780 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, 5781 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5782 Int16Regs:$r)>; 5783 5784def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 5785 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5786 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, 5787 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5788 Int16Regs:$r)>; 5789 5790def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 5791 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5792 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, 5793 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5794 Int32Regs:$r)>; 5795 5796def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 5797 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5798 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, 5799 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5800 Int64Regs:$r)>; 5801 5802def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 5803 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5804 Int16Regs:$r, Int16Regs:$g), 5805 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, 5806 Int32Regs:$x, Int32Regs:$y, 5807 Int16Regs:$r, Int16Regs:$g)>; 5808 5809def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 5810 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5811 Int16Regs:$r, Int16Regs:$g), 5812 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, 5813 Int32Regs:$x, Int32Regs:$y, 5814 Int16Regs:$r, Int16Regs:$g)>; 5815 5816def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 5817 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5818 Int32Regs:$g), 5819 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5820 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5821 5822def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 5823 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5824 Int64Regs:$g), 5825 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, 5826 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5827 5828def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 5829 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5830 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5831 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, 5832 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5833 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5834 5835def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 5836 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5837 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5838 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, 5839 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5840 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5841 5842def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 5843 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5844 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5845 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 5846 Int32Regs:$x, Int32Regs:$y, 5847 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5848 5849 5850 5851def : Pat<(int_nvvm_sust_b_3d_i8_zero 5852 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5853 Int16Regs:$r), 5854 (SUST_B_3D_B8_ZERO_R Int64Regs:$s, 5855 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5856 Int16Regs:$r)>; 5857 5858def : Pat<(int_nvvm_sust_b_3d_i16_zero 5859 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5860 Int16Regs:$r), 5861 (SUST_B_3D_B16_ZERO_R Int64Regs:$s, 5862 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5863 Int16Regs:$r)>; 5864 5865def : Pat<(int_nvvm_sust_b_3d_i32_zero 5866 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5867 Int32Regs:$r), 5868 (SUST_B_3D_B32_ZERO_R Int64Regs:$s, 5869 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5870 Int32Regs:$r)>; 5871 5872def : Pat<(int_nvvm_sust_b_3d_i64_zero 5873 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5874 Int64Regs:$r), 5875 (SUST_B_3D_B64_ZERO_R Int64Regs:$s, 5876 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5877 Int64Regs:$r)>; 5878 5879def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 5880 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5881 Int16Regs:$r, Int16Regs:$g), 5882 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, 5883 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5884 Int16Regs:$r, Int16Regs:$g)>; 5885 5886def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 5887 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5888 Int16Regs:$r, Int16Regs:$g), 5889 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, 5890 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5891 Int16Regs:$r, Int16Regs:$g)>; 5892 5893def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 5894 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5895 Int32Regs:$r, Int32Regs:$g), 5896 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, 5897 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5898 Int32Regs:$r, Int32Regs:$g)>; 5899 5900def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 5901 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5902 Int64Regs:$r, Int64Regs:$g), 5903 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, 5904 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5905 Int64Regs:$r, Int64Regs:$g)>; 5906 5907def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 5908 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5909 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5910 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, 5911 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5912 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5913 5914def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 5915 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5916 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5917 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, 5918 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5919 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5920 5921def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 5922 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5923 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5924 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, 5925 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5926 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5927 5928 5929 5930 5931def : Pat<(int_nvvm_sust_p_1d_i8_trap 5932 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5933 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5934 5935def : Pat<(int_nvvm_sust_p_1d_i16_trap 5936 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5937 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5938 5939def : Pat<(int_nvvm_sust_p_1d_i32_trap 5940 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5941 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5942 5943def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 5944 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5945 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5946 Int16Regs:$r, Int16Regs:$g)>; 5947 5948def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 5949 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5950 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5951 Int16Regs:$r, Int16Regs:$g)>; 5952 5953def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 5954 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5955 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5956 Int32Regs:$r, Int32Regs:$g)>; 5957 5958def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 5959 Int64Regs:$s, Int32Regs:$x, 5960 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5961 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 5962 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5963 5964def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 5965 Int64Regs:$s, Int32Regs:$x, 5966 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5967 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 5968 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5969 5970def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 5971 Int64Regs:$s, Int32Regs:$x, 5972 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5973 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 5974 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5975 5976 5977 5978def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 5979 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5980 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5981 Int16Regs:$r)>; 5982 5983def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 5984 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5985 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5986 Int16Regs:$r)>; 5987 5988def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 5989 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5990 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5991 Int32Regs:$r)>; 5992 5993def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 5994 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5995 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5996 Int16Regs:$r, Int16Regs:$g)>; 5997 5998def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 5999 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6000 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6001 Int16Regs:$r, Int16Regs:$g)>; 6002 6003def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 6004 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6005 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6006 Int32Regs:$r, Int32Regs:$g)>; 6007 6008def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 6009 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6010 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6011 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6012 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6013 6014def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 6015 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6016 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6017 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6018 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6019 6020def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 6021 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6022 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6023 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6024 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6025 6026 6027 6028def : Pat<(int_nvvm_sust_p_2d_i8_trap 6029 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6030 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6031 Int16Regs:$r)>; 6032 6033def : Pat<(int_nvvm_sust_p_2d_i16_trap 6034 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6035 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6036 Int16Regs:$r)>; 6037 6038def : Pat<(int_nvvm_sust_p_2d_i32_trap 6039 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6040 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6041 Int32Regs:$r)>; 6042 6043def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 6044 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6045 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6046 Int16Regs:$r, Int16Regs:$g)>; 6047 6048def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 6049 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6050 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6051 Int16Regs:$r, Int16Regs:$g)>; 6052 6053def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 6054 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6055 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6056 Int32Regs:$r, Int32Regs:$g)>; 6057 6058def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 6059 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6060 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6061 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6062 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6063 6064def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 6065 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6066 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6067 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6068 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6069 6070def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 6071 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6072 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6073 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6074 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6075 6076 6077 6078def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 6079 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6080 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 6081 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6082 Int16Regs:$r)>; 6083 6084def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 6085 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6086 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 6087 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6088 Int16Regs:$r)>; 6089 6090def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 6091 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6092 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 6093 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6094 Int32Regs:$r)>; 6095 6096def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 6097 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6098 Int16Regs:$r, Int16Regs:$g), 6099 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 6100 Int32Regs:$x, Int32Regs:$y, 6101 Int16Regs:$r, Int16Regs:$g)>; 6102 6103def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 6104 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6105 Int16Regs:$r, Int16Regs:$g), 6106 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 6107 Int32Regs:$x, Int32Regs:$y, 6108 Int16Regs:$r, Int16Regs:$g)>; 6109 6110def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 6111 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6112 Int32Regs:$g), 6113 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6114 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6115 6116def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 6117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6118 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6119 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 6120 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6121 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6122 6123def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 6124 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6125 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6126 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 6127 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6128 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6129 6130def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 6131 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6132 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6133 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6134 Int32Regs:$x, Int32Regs:$y, 6135 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6136 6137 6138 6139def : Pat<(int_nvvm_sust_p_3d_i8_trap 6140 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6141 Int16Regs:$r), 6142 (SUST_P_3D_B8_TRAP_R Int64Regs:$s, 6143 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6144 Int16Regs:$r)>; 6145 6146def : Pat<(int_nvvm_sust_p_3d_i16_trap 6147 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6148 Int16Regs:$r), 6149 (SUST_P_3D_B16_TRAP_R Int64Regs:$s, 6150 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6151 Int16Regs:$r)>; 6152 6153def : Pat<(int_nvvm_sust_p_3d_i32_trap 6154 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6155 Int32Regs:$r), 6156 (SUST_P_3D_B32_TRAP_R Int64Regs:$s, 6157 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6158 Int32Regs:$r)>; 6159 6160def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 6161 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6162 Int16Regs:$r, Int16Regs:$g), 6163 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, 6164 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6165 Int16Regs:$r, Int16Regs:$g)>; 6166 6167def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 6168 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6169 Int16Regs:$r, Int16Regs:$g), 6170 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, 6171 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6172 Int16Regs:$r, Int16Regs:$g)>; 6173 6174def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 6175 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6176 Int32Regs:$r, Int32Regs:$g), 6177 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, 6178 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6179 Int32Regs:$r, Int32Regs:$g)>; 6180 6181def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 6182 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6183 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6184 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, 6185 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6186 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6187 6188def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 6189 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6190 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6191 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, 6192 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6193 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6194 6195def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 6196 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6197 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6198 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, 6199 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6200 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6201 6202//----------------------------------- 6203// Read Special Registers 6204//----------------------------------- 6205 6206class PTX_READ_SREG_R64<string regname, Intrinsic intop> 6207 : NVPTXInst<(outs Int64Regs:$d), (ins), 6208 !strconcat("mov.u64 \t$d, %", regname, ";"), 6209 [(set Int64Regs:$d, (intop))]>; 6210 6211class PTX_READ_SREG_R32<string regname, Intrinsic intop> 6212 : NVPTXInst<(outs Int32Regs:$d), (ins), 6213 !strconcat("mov.u32 \t$d, %", regname, ";"), 6214 [(set Int32Regs:$d, (intop))]>; 6215 6216// TODO Add read vector-version of special registers 6217 6218def INT_PTX_SREG_TID_X : 6219 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>; 6220def INT_PTX_SREG_TID_Y : 6221 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>; 6222def INT_PTX_SREG_TID_Z : 6223 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>; 6224def INT_PTX_SREG_TID_W : 6225 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>; 6226 6227def INT_PTX_SREG_NTID_X : 6228 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>; 6229def INT_PTX_SREG_NTID_Y : 6230 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>; 6231def INT_PTX_SREG_NTID_Z : 6232 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>; 6233def INT_PTX_SREG_NTID_W : 6234 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>; 6235 6236def INT_PTX_SREG_LANEID : 6237 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 6238def INT_PTX_SREG_WARPID : 6239 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 6240def INT_PTX_SREG_NWARPID : 6241 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 6242 6243def INT_PTX_SREG_CTAID_X : 6244 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>; 6245def INT_PTX_SREG_CTAID_Y : 6246 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>; 6247def INT_PTX_SREG_CTAID_Z : 6248 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>; 6249def INT_PTX_SREG_CTAID_W : 6250 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>; 6251 6252def INT_PTX_SREG_NCTAID_X : 6253 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>; 6254def INT_PTX_SREG_NCTAID_Y : 6255 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>; 6256def INT_PTX_SREG_NCTAID_Z : 6257 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>; 6258def INT_PTX_SREG_NCTAID_W : 6259 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>; 6260 6261def INT_PTX_SREG_SMID : 6262 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 6263def INT_PTX_SREG_NSMID : 6264 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 6265def INT_PTX_SREG_GRIDID : 6266 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 6267 6268def INT_PTX_SREG_LANEMASK_EQ : 6269 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 6270def INT_PTX_SREG_LANEMASK_LE : 6271 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 6272def INT_PTX_SREG_LANEMASK_LT : 6273 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 6274def INT_PTX_SREG_LANEMASK_GE : 6275 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 6276def INT_PTX_SREG_LANEMASK_GT : 6277 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 6278 6279def INT_PTX_SREG_CLOCK : 6280 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 6281def INT_PTX_SREG_CLOCK64 : 6282 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 6283 6284def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 6285def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 6286def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 6287def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 6288 6289// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 6290// handle the constant. 6291def INT_PTX_SREG_WARPSIZE : 6292 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 6293 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 6294 6295// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 6296// In addition to target-independent fields provided by WMMA_REGS, it adds 6297// the fields commonly used to implement specific PTX instruction -- register 6298// types and names, constraints, parts of assembly, etc. 6299class WMMA_REGINFO<WMMA_REGS r, string op> 6300 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 6301 // NVPTX register types used to carry fragment data. 6302 NVPTXRegClass regclass = !cond( 6303 !eq(ptx_elt_type, "f16") : Float16x2Regs, 6304 !eq(ptx_elt_type, "f32") : Float32Regs, 6305 !eq(ptx_elt_type, "f64") : Float64Regs, 6306 !eq(ptx_elt_type, "bf16") : Int32Regs, 6307 !eq(ptx_elt_type, "tf32") : Int32Regs, 6308 !eq(ptx_elt_type, "s32") : Int32Regs, 6309 !eq(ptx_elt_type, "b16") : Int32Regs, 6310 !eq(ptx_elt_type, "s8") : Int32Regs, 6311 !eq(ptx_elt_type, "u8") : Int32Regs, 6312 !eq(ptx_elt_type, "s4") : Int32Regs, 6313 !eq(ptx_elt_type, "u4") : Int32Regs, 6314 !eq(ptx_elt_type, "b1") : Int32Regs); 6315 6316 // Instruction input/output arguments for the fragment. 6317 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 6318 6319 // List of register names for the fragment -- ["ra0", "ra1",...] 6320 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 6321 6322 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 6323 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 6324 6325 // Predicates for particular fragment variant. Technically those are 6326 // per-instruction predicates, but currently all fragments that can be used in 6327 // a given instruction are subject to the same constraints, so an instruction 6328 // can use predicates from any of its fragments. If/when this is no 6329 // longer the case, we can concat all per-fragment predicates to enforce that 6330 // all fragments of the instruction are viable. 6331 list<Predicate> Predicates = !cond( 6332 // fp16 -> fp16/fp32 @ m16n16k16 6333 !and(!eq(geom, "m16n16k16"), 6334 !or(!eq(ptx_elt_type, "f16"), 6335 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60], 6336 6337 !and(!eq(geom,"m8n8k4"), 6338 !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70], 6339 6340 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 6341 !and(!or(!eq(geom, "m8n32k16"), 6342 !eq(geom, "m32n8k16")), 6343 !or(!eq(ptx_elt_type, "f16"), 6344 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61], 6345 6346 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 6347 !and(!or(!eq(geom,"m16n16k16"), 6348 !eq(geom,"m8n32k16"), 6349 !eq(geom,"m32n8k16")), 6350 !or(!eq(ptx_elt_type, "u8"), 6351 !eq(ptx_elt_type, "s8"), 6352 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63], 6353 6354 !and(!or(!eq(geom,"m16n16k16"), 6355 !eq(geom,"m8n32k16"), 6356 !eq(geom,"m32n8k16")), 6357 !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70], 6358 6359 !and(!eq(geom,"m16n16k8"), 6360 !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70], 6361 6362 !and(!eq(geom,"m16n16k8"), 6363 !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70], 6364 6365 // b1 -> s32 @ m8n8k128(b1) 6366 !and(!ne(op,"mma"), 6367 !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63], 6368 6369 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 6370 !and(!ne(op,"mma"), 6371 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63], 6372 6373 !or(!eq(geom,"m16n8k8"), 6374 !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65], 6375 6376 !and(!ne(ptx_elt_type,"f64"), 6377 !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64], 6378 6379 // mma m8n8k32 requires higher PTX version 6380 !and(!eq(op,"mma"), 6381 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65], 6382 6383 !and(!eq(ptx_elt_type,"f64"), 6384 !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70], 6385 6386 !and(!eq(op,"mma"), 6387 !or(!eq(geom, "m16n8k16"), 6388 !eq(geom, "m16n8k4"), 6389 !eq(geom, "m16n8k32"), 6390 !eq(geom, "m16n8k64"), 6391 !eq(geom, "m8n8k128"), 6392 !eq(geom, "m16n8k128"), 6393 !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70], 6394 6395 !and(!eq(op,"ldmatrix"), 6396 !eq(ptx_elt_type,"b16"), 6397 !eq(geom, "m8n8")) : [hasSM75, hasPTX65]); 6398 6399 // template DAGs for instruction inputs/output. 6400 dag Outs = !dag(outs, ptx_regs, reg_names); 6401 dag Ins = !dag(ins, ptx_regs, reg_names); 6402} 6403 6404// Convert dag of arguments into a dag to match given intrinsic. 6405class BuildPatternI<Intrinsic Intr, dag Ins> { 6406 // Build a dag pattern that matches the intrinsic call. 6407 dag ret = !foreach(tmp, Ins, 6408 !subst(imem, ADDRvar, 6409 !subst(MEMri64, ADDRri64, 6410 !subst(MEMri, ADDRri, 6411 !subst(ins, Intr, tmp))))); 6412} 6413 6414// Same as above, but uses PatFrag instead of an Intrinsic. 6415class BuildPatternPF<PatFrag Intr, dag Ins> { 6416 // Build a dag pattern that matches the intrinsic call. 6417 dag ret = !foreach(tmp, Ins, 6418 !subst(imem, ADDRvar, 6419 !subst(MEMri64, ADDRri64, 6420 !subst(MEMri, ADDRri, 6421 !subst(ins, Intr, tmp))))); 6422} 6423 6424// Common WMMA-related fields used for building patterns for all MMA instructions. 6425class WMMA_INSTR<string _Intr, list<dag> _Args> 6426 : NVPTXInst<(outs), (ins), "?", []> { 6427 Intrinsic Intr = !cast<Intrinsic>(_Intr); 6428 // Concatenate all arguments into a single dag. 6429 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 6430 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 6431 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 6432} 6433 6434// 6435// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6436// 6437 6438class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 6439 DAGOperand SrcOp> 6440 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 6441 [!con((ins SrcOp:$src), 6442 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6443 Requires<Frag.Predicates> { 6444 // Load/store intrinsics are overloaded on pointer's address space. 6445 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6446 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6447 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 6448 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 6449 // Build PatFrag that only matches particular address space. 6450 PatFrag IntrFrag = PatFrag<PFOperands, 6451 PFOperandsIntr, 6452 !cond(!eq(Space, ".shared"): AS_match.shared, 6453 !eq(Space, ".global"): AS_match.global, 6454 true: AS_match.generic)>; 6455 // Build AS-constrained pattern. 6456 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6457 6458 let OutOperandList = Frag.Outs; 6459 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6460 let AsmString = "wmma.load." 6461 # Frag.frag 6462 # ".sync" 6463 # "${ptx:aligned}" 6464 # "." # Layout 6465 # "." # Frag.geom 6466 # Space 6467 # "." # Frag.ptx_elt_type # " \t" 6468 # Frag.regstring 6469 # ", [$src]" 6470 # !if(WithStride, ", $ldm", "") 6471 # ";"; 6472} 6473 6474// 6475// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 6476// 6477class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 6478 bit WithStride, DAGOperand DstOp> 6479 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 6480 [!con((ins DstOp:$dst), 6481 Frag.Ins, 6482 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 6483 Requires<Frag.Predicates> { 6484 6485 // Load/store intrinsics are overloaded on pointer's address space. 6486 // To match the right intrinsic, we need to build AS-constrained PatFrag. 6487 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 6488 dag PFOperands = !con((ops node:$dst), 6489 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 6490 !if(WithStride, (ops node:$ldm), (ops))); 6491 // Build PatFrag that only matches particular address space. 6492 PatFrag IntrFrag = PatFrag<PFOperands, 6493 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 6494 !cond(!eq(Space, ".shared"): AS_match.shared, 6495 !eq(Space, ".global"): AS_match.global, 6496 true: AS_match.generic)>; 6497 // Build AS-constrained pattern. 6498 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6499 6500 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6501 let OutOperandList = (outs); 6502 let AsmString = "wmma.store.d.sync" 6503 # "${ptx:aligned}" 6504 # "." # Layout 6505 # "." # Frag.geom 6506 # Space 6507 # "." # Frag.ptx_elt_type 6508 # " \t[$dst]," 6509 # Frag.regstring 6510 # !if(WithStride, ", $ldm", "") 6511 # ";"; 6512} 6513 6514// Create all load/store variants 6515defset list<WMMA_INSTR> MMA_LDSTs = { 6516 foreach layout = ["row", "col"] in { 6517 foreach stride = [false, true] in { 6518 foreach space = [".global", ".shared", ""] in { 6519 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6520 foreach frag = NVVM_MMA_OPS.all_ld_ops in 6521 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6522 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 6523 foreach frag = NVVM_MMA_OPS.all_st_ops in 6524 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 6525 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 6526 } // addr 6527 } // space 6528 } // stride 6529 } // layout 6530} // defset 6531 6532// B1 instruction variants need extra constraints. 6533class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 6534 string Op = b1op; 6535 WMMA_REGINFO Frag = FragA; 6536 list<Predicate> ret = !listconcat( 6537 FragA.Predicates, 6538 !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[]) 6539 ); 6540} 6541// WMMA.MMA 6542class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6543 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6544 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 6545 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 6546 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6547 // Requires does not seem to have effect on Instruction w/o Patterns. 6548 // We set it here anyways and propagate to the Pat<> we construct below. 6549 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6550 let OutOperandList = FragD.Outs; 6551 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6552 string TypeList = !cond( 6553 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 6554 # "." # FragC.ptx_elt_type, 6555 1: "." # FragD.ptx_elt_type 6556 # "." # FragA.ptx_elt_type 6557 # "." # FragB.ptx_elt_type 6558 # "." # FragC.ptx_elt_type, 6559 ); 6560 let AsmString = "wmma.mma" 6561 # b1op 6562 # ".sync" 6563 # "${ptx:aligned}" 6564 # "." # ALayout 6565 # "." # BLayout 6566 # "." # FragA.geom 6567 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 6568 # TypeList 6569 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 6570 # FragD.regstring # ",\n\t\t" 6571 # FragA.regstring # ",\n\t\t" 6572 # FragB.regstring # ",\n\t\t" 6573 # FragC.regstring # ";"; 6574} 6575 6576defset list<WMMA_INSTR> WMMAs = { 6577 foreach layout_a = ["row", "col"] in { 6578 foreach layout_b = ["row", "col"] in { 6579 foreach satf = [0, 1] in { 6580 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 6581 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 6582 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6583 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 6584 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 6585 WMMA_REGINFO<op[1], "wmma.mma">, 6586 WMMA_REGINFO<op[2], "wmma.mma">, 6587 WMMA_REGINFO<op[3], "wmma.mma">, 6588 layout_a, layout_b, satf, rnd, b1op>; 6589 } 6590 } // b1op 6591 } // op 6592 } // rnd 6593 } // satf 6594 } // layout_b 6595 } // layout_a 6596} // defset 6597 6598// MMA 6599class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 6600 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 6601 string ALayout, string BLayout, int Satfinite, string b1op> 6602 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 6603 [FragA.Ins, FragB.Ins, FragC.Ins]>, 6604 // Requires does not seem to have effect on Instruction w/o Patterns. 6605 // We set it here anyways and propagate to the Pat<> we construct below. 6606 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 6607 let OutOperandList = FragD.Outs; 6608 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6609 string TypeList = "." # FragD.ptx_elt_type 6610 # "." # FragA.ptx_elt_type 6611 # "." # FragB.ptx_elt_type 6612 # "." # FragC.ptx_elt_type; 6613 let AsmString = "mma.sync.aligned." 6614 # FragA.geom 6615 # "." # ALayout 6616 # "." # BLayout 6617 # !if(Satfinite, ".satfinite", "") 6618 # TypeList 6619 # b1op # "\n\t\t" 6620 # FragD.regstring # ",\n\t\t" 6621 # FragA.regstring # ",\n\t\t" 6622 # FragB.regstring # ",\n\t\t" 6623 # FragC.regstring # ";"; 6624} 6625 6626defset list<WMMA_INSTR> MMAs = { 6627 foreach layout_a = ["row", "col"] in { 6628 foreach layout_b = ["row", "col"] in { 6629 foreach satf = [0, 1] in { 6630 foreach op = NVVM_MMA_OPS.all_mma_ops in { 6631 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 6632 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 6633 def : MMA<WMMA_REGINFO<op[0], "mma">, 6634 WMMA_REGINFO<op[1], "mma">, 6635 WMMA_REGINFO<op[2], "mma">, 6636 WMMA_REGINFO<op[3], "mma">, 6637 layout_a, layout_b, satf, b1op>; 6638 } 6639 } // b1op 6640 } // op 6641 } // satf 6642 } // layout_b 6643 } // layout_a 6644} // defset 6645 6646// 6647// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 6648// 6649class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, 6650 DAGOperand SrcOp> 6651 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, 6652 Requires<Frag.Predicates> { 6653 // Build PatFrag that only matches particular address space. 6654 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), 6655 !cond(!eq(Space, ".shared"): AS_match.shared, 6656 true: AS_match.generic)>; 6657 // Build AS-constrained pattern. 6658 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 6659 6660 let OutOperandList = Frag.Outs; 6661 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 6662 let AsmString = "ldmatrix.sync.aligned." 6663 # Frag.geom 6664 # "." # Frag.frag 6665 # !if(Transposed, ".trans", "") 6666 # Space 6667 # "." # Frag.ptx_elt_type 6668 # " " # Frag.regstring # ", [$src];"; 6669} 6670 6671// Create all ldmatrix variants 6672defset list<WMMA_INSTR> LDMATRIXs = { 6673 foreach transposed = [false, true] in { 6674 foreach space = [".shared", ""] in { 6675 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 6676 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in 6677 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then 6678 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, 6679 addr>; 6680 } // addr 6681 } // space 6682 } // transposed 6683} // defset 6684 6685// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 6686// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 6687// the instruction record. 6688class MMA_PAT<WMMA_INSTR wi> 6689 : Pat<wi.IntrinsicPattern, 6690 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 6691 (wi ptx.version))>, 6692 Requires<wi.Predicates>; 6693 6694// Build intrinsic->instruction patterns for all MMA instructions. 6695foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in 6696 def : MMA_PAT<mma>; 6697