1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret, 55 [prefix # !add(n, -1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0,1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = 1 in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX60, hasSM30]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 110 Requires<[hasPTX60, hasSM30]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX60, hasSM30]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 117 Requires<[hasPTX60, hasSM30]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 122 Requires<[hasPTX60, hasSM30]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 126 Requires<[hasPTX60, hasSM30]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 130 Requires<[hasPTX60, hasSM30]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX60, hasSM30]>; 135 136class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 137 bit offset_imm, bit mask_imm, bit threadmask_imm> 138 : NVPTXInst<(outs), (ins), "?", []> { 139 NVPTXRegClass rc = !cond( 140 !eq(reg, "i32"): Int32Regs, 141 !eq(reg, "f32"): Float32Regs); 142 string IntrName = "int_nvvm_shfl_" 143 # !if(sync, "sync_", "") 144 # mode 145 # "_" # reg 146 # !if(return_pred, "p", ""); 147 Intrinsic Intr = !cast<Intrinsic>(IntrName); 148 let InOperandList = !con( 149 !if(sync, 150 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 151 (ins)), 152 (ins rc:$src), 153 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 154 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 155 ); 156 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 157 let AsmString = "shfl." 158 # !if(sync, "sync.", "") 159 # mode # ".b32\t" 160 # "$dst" 161 # !if(return_pred, "|$pred", "") # ", " 162 # "$src, $offset, $mask" 163 # !if(sync, ", $threadmask", "") 164 # ";" 165 ; 166 let Pattern = [!con( 167 !foreach(tmp, OutOperandList, 168 !subst(outs, set, 169 !subst(i32imm, imm, tmp))), 170 (set !foreach(tmp, InOperandList, 171 !subst(ins, Intr, 172 !subst(i32imm, imm, tmp)))) 173 )]; 174} 175 176foreach sync = [0, 1] in { 177 foreach mode = ["up", "down", "bfly", "idx"] in { 178 foreach regclass = ["i32", "f32"] in { 179 foreach return_pred = [0, 1] in { 180 foreach offset_imm = [0, 1] in { 181 foreach mask_imm = [0, 1] in { 182 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 183 def : SHFL_INSTR<sync, mode, regclass, return_pred, 184 offset_imm, mask_imm, threadmask_imm>, 185 Requires<!if(sync, [hasSM30], [hasSM30, hasSHFL])>; 186 } 187 } 188 } 189 } 190 } 191 } 192} 193 194// vote.{all,any,uni,ballot} 195multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 196 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 197 "vote." # mode # " \t$dest, $pred;", 198 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 199 Requires<[hasPTX60, hasSM30]>; 200} 201 202defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 203defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 204defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 205defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 206 207// vote.sync.{all,any,uni,ballot} 208multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 209 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 210 "vote.sync." # mode # " \t$dest, $pred, $mask;", 211 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 212 Requires<[hasPTX60, hasSM30]>; 213 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 214 "vote.sync." # mode #" \t$dest, $pred, $mask;", 215 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 216 Requires<[hasPTX60, hasSM30]>; 217} 218 219defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 220defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 221defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 222defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 223 224multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 225 Operand ImmOp> { 226 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), 227 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 228 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, 229 Requires<[hasPTX60, hasSM70]>; 230 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), 231 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 232 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 233 Requires<[hasPTX60, hasSM70]>; 234 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), 235 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 236 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, 237 Requires<[hasPTX60, hasSM70]>; 238 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), 239 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 240 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 241 Requires<[hasPTX60, hasSM70]>; 242} 243 244defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 245 i32imm>; 246defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 247 i64imm>; 248 249multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 250 Operand ImmOp> { 251 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 252 (ins i32imm:$mask, ImmOp:$value), 253 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 254 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 255 Requires<[hasPTX60, hasSM70]>; 256 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 257 (ins Int32Regs:$mask, ImmOp:$value), 258 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 259 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 260 Requires<[hasPTX60, hasSM70]>; 261 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 262 (ins i32imm:$mask, regclass:$value), 263 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 264 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 265 Requires<[hasPTX60, hasSM70]>; 266 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 267 (ins Int32Regs:$mask, regclass:$value), 268 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 269 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 270 Requires<[hasPTX60, hasSM70]>; 271} 272defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 273 i32imm>; 274defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 275 i64imm>; 276 277} // isConvergent = 1 278 279//----------------------------------- 280// Explicit Memory Fence Functions 281//----------------------------------- 282class MEMBAR<string StrOp, Intrinsic IntOP> : 283 NVPTXInst<(outs), (ins), 284 StrOp, [(IntOP)]>; 285 286def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 287def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 288def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 289 290 291//----------------------------------- 292// Math Functions 293//----------------------------------- 294 295// Map min(1.0, max(0.0, x)) to sat(x) 296// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 297// NaN 298// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 299// Same story for fmax, fmin. 300 301def : Pat<(int_nvvm_fmin_f immFloat1, 302 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 303 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 304def : Pat<(int_nvvm_fmin_f immFloat1, 305 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 306 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 307def : Pat<(int_nvvm_fmin_f 308 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 309 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 310def : Pat<(int_nvvm_fmin_f 311 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 312 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 313 314def : Pat<(int_nvvm_fmin_d immDouble1, 315 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 316 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 317def : Pat<(int_nvvm_fmin_d immDouble1, 318 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 319 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 320def : Pat<(int_nvvm_fmin_d 321 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 322 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 323def : Pat<(int_nvvm_fmin_d 324 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 325 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 326 327 328// We need a full string for OpcStr here because we need to deal with case like 329// INT_PTX_RECIP. 330class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 331 NVPTXRegClass src_regclass, Intrinsic IntOP> 332 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 333 OpcStr, 334 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 335 336// We need a full string for OpcStr here because we need to deal with the case 337// like INT_PTX_NATIVE_POWR_F. 338class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 339 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 340 : NVPTXInst<(outs t_regclass:$dst), 341 (ins s0_regclass:$src0, s1_regclass:$src1), 342 OpcStr, 343 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 344 345class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 346 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 347 NVPTXRegClass s2_regclass, Intrinsic IntOP> 348 : NVPTXInst<(outs t_regclass:$dst), 349 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 350 OpcStr, 351 [(set t_regclass:$dst, 352 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 353 354// 355// MISC 356// 357 358def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 359 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 360 361// 362// Min Max 363// 364 365def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 366 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 367def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 368 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 369 370def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 371 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 372def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 373 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 374 375def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 376 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 377def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 378 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 379 380 381// 382// Multiplication 383// 384 385def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 386 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 387def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 388 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 389 390def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 391 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 392def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 393 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 394 395def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 396 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 397def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 398 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 399def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 400 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 401def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 402 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 403def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 404 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 405def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 406 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 407def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 408 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 409def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 410 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 411 412def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 413 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 414def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 415 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 416def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 417 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 418def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 419 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 420 421def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 422 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 423def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 424 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 425 426// 427// Div 428// 429 430def INT_NVVM_DIV_APPROX_FTZ_F 431 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 432 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 433def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 434 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 435 436def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 437 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 438def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 439 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 440def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 441 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 442def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 443 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 444def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 445 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 446def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 447 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 448def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 449 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 450def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 451 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 452 453def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 454 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 455def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 456 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 457def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 458 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 459def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 460 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 461 462// 463// Sad 464// 465 466def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 467 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 468def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 469 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 470 471// 472// Floor Ceil 473// 474 475def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 476 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 477def : Pat<(int_nvvm_floor_f Float32Regs:$a), 478 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 479def : Pat<(int_nvvm_floor_d Float64Regs:$a), 480 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 481 482def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 483 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 484def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 485 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 486def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 487 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 488 489// 490// Abs 491// 492 493def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 494 Float32Regs, int_nvvm_fabs_ftz_f>; 495def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 496 Float32Regs, int_nvvm_fabs_f>; 497 498def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 499 Float64Regs, int_nvvm_fabs_d>; 500 501// 502// Round 503// 504 505def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 506 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 507def : Pat<(int_nvvm_round_f Float32Regs:$a), 508 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 509def : Pat<(int_nvvm_round_d Float64Regs:$a), 510 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 511 512// 513// Trunc 514// 515 516def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 517 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 518def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 519 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 520def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 521 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 522 523// 524// Saturate 525// 526 527def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 528 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 529def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 530 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 531def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 532 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 533 534// 535// Exp2 Log2 536// 537 538def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 539 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 540def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 541 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 542def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 543 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 544 545def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 546 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 547def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 548 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 549def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 550 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 551 552// 553// Sin Cos 554// 555 556def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 557 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 558def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 559 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 560 561def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 562 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 563def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 564 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 565 566// 567// Fma 568// 569 570def INT_NVVM_FMA_RN_FTZ_F 571 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 572 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 573def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 574 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 575def INT_NVVM_FMA_RZ_FTZ_F 576 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 577 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 578def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 579 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 580def INT_NVVM_FMA_RM_FTZ_F 581 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 582 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 583def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 584 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 585def INT_NVVM_FMA_RP_FTZ_F 586 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 587 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 588def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 589 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 590 591def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 592 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 593def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 594 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 595def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 596 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 597def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 598 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 599 600// 601// Rcp 602// 603 604def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 605 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 606def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 607 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 608def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 609 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 610def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 611 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 612def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 613 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 614def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 615 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 616def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 617 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 618def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 619 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 620 621def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 622 Float64Regs, int_nvvm_rcp_rn_d>; 623def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 624 Float64Regs, int_nvvm_rcp_rz_d>; 625def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 626 Float64Regs, int_nvvm_rcp_rm_d>; 627def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 628 Float64Regs, int_nvvm_rcp_rp_d>; 629 630def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 631 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 632 633// 634// Sqrt 635// 636 637def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 638 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 639def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 640 Float32Regs, int_nvvm_sqrt_rn_f>; 641def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 642 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 643def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 644 Float32Regs, int_nvvm_sqrt_rz_f>; 645def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 646 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 647def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 648 Float32Regs, int_nvvm_sqrt_rm_f>; 649def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 650 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 651def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 652 Float32Regs, int_nvvm_sqrt_rp_f>; 653def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 654 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 655def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 656 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 657 658def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 659 Float64Regs, int_nvvm_sqrt_rn_d>; 660def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 661 Float64Regs, int_nvvm_sqrt_rz_d>; 662def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 663 Float64Regs, int_nvvm_sqrt_rm_d>; 664def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 665 Float64Regs, int_nvvm_sqrt_rp_d>; 666 667// nvvm_sqrt intrinsic 668def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 669 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 670def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 671 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 672def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 673 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 674def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 675 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 676 677// 678// Rsqrt 679// 680 681def INT_NVVM_RSQRT_APPROX_FTZ_F 682 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 683 int_nvvm_rsqrt_approx_ftz_f>; 684def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 685 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 686def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 687 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 688 689// 690// Add 691// 692 693def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 694 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 695def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 696 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 697def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 698 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 699def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 700 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 701def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 702 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 703def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 704 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 705def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 706 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 707def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 708 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 709 710def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 711 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 712def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 713 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 714def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 715 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 716def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 717 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 718 719// 720// Convert 721// 722 723def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 724 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 725def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 726 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 727def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 728 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 729def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 730 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 731def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 732 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 733def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 734 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 735def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 736 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 737def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 738 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 739 740def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 741 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 742def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 743 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 744def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 745 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 746def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 747 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 748 749def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 750 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 751def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 752 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 753def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 754 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 755def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 756 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 757 758def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 759 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 760def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 761 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 762def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 763 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 764def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 765 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 766 767def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 768 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 769def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 770 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 771def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 772 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 773def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 774 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 775 776def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 777 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 778def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 779 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 780def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 781 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 782def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 783 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 784def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 785 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 786def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 787 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 788def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 789 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 790def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 791 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 792 793def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 794 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 795def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 796 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 797def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 798 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 799def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 800 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 801def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 802 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 803def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 804 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 805def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 806 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 807def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 808 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 809 810def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 811 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 812def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 813 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 814def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 815 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 816def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 817 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 818 819def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 820 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 821def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 822 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 823def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 824 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 825def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 826 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 827 828def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 829 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 830 831def INT_NVVM_D2I_LO : F_MATH_1< 832 !strconcat("{{\n\t", 833 ".reg .b32 %temp; \n\t", 834 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 835 "}}"), 836 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 837def INT_NVVM_D2I_HI : F_MATH_1< 838 !strconcat("{{\n\t", 839 ".reg .b32 %temp; \n\t", 840 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 841 "}}"), 842 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 843 844def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 845 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 846def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 847 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 848def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 849 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 850def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 851 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 852def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 853 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 854def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 855 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 856def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 857 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 858def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 859 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 860 861def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 862 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 863def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 864 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 865def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 866 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 867def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 868 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 869def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 870 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 871def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 872 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 873def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 874 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 875def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 876 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 877 878def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 879 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 880def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 881 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 882def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 883 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 884def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 885 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 886 887def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 888 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 889def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 890 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 891def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 892 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 893def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 894 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 895 896def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 897 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 898def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 899 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 900def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 901 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 902def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 903 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 904 905def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 906 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 907def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 908 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 909def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 910 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 911def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 912 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 913 914def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 915 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 916def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 917 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 918def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 919 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 920def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 921 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 922 923def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 924 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 925def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 926 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 927def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 928 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 929def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 930 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 931 932 933def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 934 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; 935def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 936 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; 937 938// 939// Bitcast 940// 941 942def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 943 Float32Regs, int_nvvm_bitcast_f2i>; 944def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 945 Int32Regs, int_nvvm_bitcast_i2f>; 946 947def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 948 Int64Regs, int_nvvm_bitcast_ll2d>; 949def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 950 Float64Regs, int_nvvm_bitcast_d2ll>; 951 952// 953// FNS 954// 955 956class INT_FNS_MBO<dag ins, dag Operands> 957 : NVPTXInst<(outs Int32Regs:$dst), ins, 958 "fns.b32 \t$dst, $mask, $base, $offset;", 959 [(set Int32Regs:$dst, Operands )]>, 960 Requires<[hasPTX60, hasSM30]>; 961 962def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 963 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 964def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 965 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 966def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 967 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 968def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 969 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 970def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 971 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 972def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 973 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 974def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 975 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 976def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 977 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 978 979//----------------------------------- 980// Atomic Functions 981//----------------------------------- 982 983class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 984 : PatFrag<ops, frag, AS_match.global>; 985class ATOMIC_SHARED_CHK <dag ops, dag frag> 986 : PatFrag<ops, frag, AS_match.shared>; 987class ATOMIC_GENERIC_CHK <dag ops, dag frag> 988 : PatFrag<ops, frag, AS_match.generic>; 989 990multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 991 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 992 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 993 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 994 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 995 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 996 Requires<Pred>; 997 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 998 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 999 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1000 Requires<Pred>; 1001} 1002multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1003 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1004 list<Predicate> Pred = []> { 1005 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1006 IntOp, IMMType, IMM, Pred>; 1007 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1008 IntOp, IMMType, IMM, Pred>; 1009} 1010 1011// has 2 operands, neg the second one 1012multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1013 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1014 Operand IMMType, list<Predicate> Pred> { 1015 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1016 !strconcat( 1017 "{{ \n\t", 1018 ".reg \t.s", TypeStr, " temp; \n\t", 1019 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1020 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1021 "}}"), 1022 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1023 Requires<Pred>; 1024} 1025multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1026 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, 1027 list<Predicate> Pred = []> { 1028 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1029 IntOp, IMMType, Pred> ; 1030 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1031 IntOp, IMMType, Pred> ; 1032} 1033 1034// has 3 operands 1035multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1036 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1037 Operand IMMType, list<Predicate> Pred> { 1038 def reg : NVPTXInst<(outs regclass:$dst), 1039 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1040 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1041 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1042 Requires<Pred>; 1043 1044 def imm1 : NVPTXInst<(outs regclass:$dst), 1045 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1046 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1047 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1048 Requires<Pred>; 1049 1050 def imm2 : NVPTXInst<(outs regclass:$dst), 1051 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1052 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1053 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1054 Requires<Pred>; 1055 1056 def imm3 : NVPTXInst<(outs regclass:$dst), 1057 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1058 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1059 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1060 Requires<Pred>; 1061} 1062multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1063 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1064 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1065 IntOp, IMMType, Pred>; 1066 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1067 IntOp, IMMType, Pred>; 1068} 1069 1070// atom_add 1071 1072def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1073 (atomic_load_add_32 node:$a, node:$b)>; 1074def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1075 (atomic_load_add_32 node:$a, node:$b)>; 1076def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1077 (atomic_load_add_32 node:$a, node:$b)>; 1078def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1079 (atomic_load_add_64 node:$a, node:$b)>; 1080def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1081 (atomic_load_add_64 node:$a, node:$b)>; 1082def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1083 (atomic_load_add_64 node:$a, node:$b)>; 1084def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1085 (atomic_load_fadd node:$a, node:$b)>; 1086def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1087 (atomic_load_fadd node:$a, node:$b)>; 1088def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1089 (atomic_load_fadd node:$a, node:$b)>; 1090 1091defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1092 atomic_load_add_32_g, i32imm, imm>; 1093defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1094 atomic_load_add_32_s, i32imm, imm>; 1095defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1096 atomic_load_add_32_gen, i32imm, imm>; 1097defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1098 ".add", atomic_load_add_32_gen, i32imm, imm>; 1099 1100defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1101 atomic_load_add_64_g, i64imm, imm>; 1102defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1103 atomic_load_add_64_s, i64imm, imm>; 1104defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1105 atomic_load_add_64_gen, i64imm, imm>; 1106defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1107 ".add", atomic_load_add_64_gen, i64imm, imm>; 1108 1109defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1110 atomic_load_add_g, f32imm, fpimm>; 1111defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1112 atomic_load_add_s, f32imm, fpimm>; 1113defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1114 atomic_load_add_gen, f32imm, fpimm>; 1115 1116defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1117 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1118defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1119 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1120defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1121 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1122 1123// atom_sub 1124 1125def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1126 (atomic_load_sub_32 node:$a, node:$b)>; 1127def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1128 (atomic_load_sub_32 node:$a, node:$b)>; 1129def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1130 (atomic_load_sub_32 node:$a, node:$b)>; 1131def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1132 (atomic_load_sub_64 node:$a, node:$b)>; 1133def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1134 (atomic_load_sub_64 node:$a, node:$b)>; 1135def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1136 (atomic_load_sub_64 node:$a, node:$b)>; 1137 1138defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1139 atomic_load_sub_32_g, i32imm>; 1140defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1141 atomic_load_sub_64_g, i64imm>; 1142defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1143 atomic_load_sub_32_gen, i32imm>; 1144defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1145 ".add", atomic_load_sub_32_gen, i32imm>; 1146defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1147 atomic_load_sub_32_s, i32imm>; 1148defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1149 atomic_load_sub_64_s, i64imm>; 1150defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1151 atomic_load_sub_64_gen, i64imm>; 1152defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1153 ".add", atomic_load_sub_64_gen, i64imm>; 1154 1155// atom_swap 1156 1157def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1158 (atomic_swap_32 node:$a, node:$b)>; 1159def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1160 (atomic_swap_32 node:$a, node:$b)>; 1161def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1162 (atomic_swap_32 node:$a, node:$b)>; 1163def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1164 (atomic_swap_64 node:$a, node:$b)>; 1165def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1166 (atomic_swap_64 node:$a, node:$b)>; 1167def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1168 (atomic_swap_64 node:$a, node:$b)>; 1169 1170defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1171 atomic_swap_32_g, i32imm, imm>; 1172defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1173 atomic_swap_32_s, i32imm, imm>; 1174defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1175 atomic_swap_32_gen, i32imm, imm>; 1176defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1177 ".exch", atomic_swap_32_gen, i32imm, imm>; 1178defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1179 atomic_swap_64_g, i64imm, imm>; 1180defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1181 atomic_swap_64_s, i64imm, imm>; 1182defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1183 atomic_swap_64_gen, i64imm, imm>; 1184defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1185 ".exch", atomic_swap_64_gen, i64imm, imm>; 1186 1187// atom_max 1188 1189def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1190 , (atomic_load_max_32 node:$a, node:$b)>; 1191def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1192 (atomic_load_max_32 node:$a, node:$b)>; 1193def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1194 (atomic_load_max_32 node:$a, node:$b)>; 1195def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1196 , (atomic_load_max_64 node:$a, node:$b)>; 1197def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1198 (atomic_load_max_64 node:$a, node:$b)>; 1199def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1200 (atomic_load_max_64 node:$a, node:$b)>; 1201def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1202 (atomic_load_umax_32 node:$a, node:$b)>; 1203def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1204 (atomic_load_umax_32 node:$a, node:$b)>; 1205def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1206 (atomic_load_umax_32 node:$a, node:$b)>; 1207def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1208 (atomic_load_umax_64 node:$a, node:$b)>; 1209def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1210 (atomic_load_umax_64 node:$a, node:$b)>; 1211def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1212 (atomic_load_umax_64 node:$a, node:$b)>; 1213 1214defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1215 ".max", atomic_load_max_32_g, i32imm, imm>; 1216defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1217 ".max", atomic_load_max_32_s, i32imm, imm>; 1218defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1219 atomic_load_max_32_gen, i32imm, imm>; 1220defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1221 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1222defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1223 ".max", atomic_load_max_64_g, i64imm, imm>; 1224defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1225 ".max", atomic_load_max_64_s, i64imm, imm>; 1226defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1227 atomic_load_max_64_gen, i64imm, imm>; 1228defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1229 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>; 1230defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1231 ".max", atomic_load_umax_32_g, i32imm, imm>; 1232defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1233 ".max", atomic_load_umax_32_s, i32imm, imm>; 1234defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1235 atomic_load_umax_32_gen, i32imm, imm>; 1236defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1237 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1238defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1239 ".max", atomic_load_umax_64_g, i64imm, imm>; 1240defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1241 ".max", atomic_load_umax_64_s, i64imm, imm>; 1242defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1243 atomic_load_umax_64_gen, i64imm, imm>; 1244defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1245 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>; 1246 1247// atom_min 1248 1249def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1250 (atomic_load_min_32 node:$a, node:$b)>; 1251def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1252 (atomic_load_min_32 node:$a, node:$b)>; 1253def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1254 (atomic_load_min_32 node:$a, node:$b)>; 1255def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1256 (atomic_load_min_64 node:$a, node:$b)>; 1257def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1258 (atomic_load_min_64 node:$a, node:$b)>; 1259def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1260 (atomic_load_min_64 node:$a, node:$b)>; 1261def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1262 (atomic_load_umin_32 node:$a, node:$b)>; 1263def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1264 (atomic_load_umin_32 node:$a, node:$b)>; 1265def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1266 (atomic_load_umin_32 node:$a, node:$b)>; 1267def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1268 (atomic_load_umin_64 node:$a, node:$b)>; 1269def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1270 (atomic_load_umin_64 node:$a, node:$b)>; 1271def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1272 (atomic_load_umin_64 node:$a, node:$b)>; 1273 1274defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1275 ".min", atomic_load_min_32_g, i32imm, imm>; 1276defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1277 ".min", atomic_load_min_32_s, i32imm, imm>; 1278defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1279 atomic_load_min_32_gen, i32imm, imm>; 1280defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1281 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1282defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1283 ".min", atomic_load_min_64_g, i64imm, imm>; 1284defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1285 ".min", atomic_load_min_64_s, i64imm, imm>; 1286defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1287 atomic_load_min_64_gen, i64imm, imm>; 1288defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1289 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>; 1290defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1291 ".min", atomic_load_umin_32_g, i32imm, imm>; 1292defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1293 ".min", atomic_load_umin_32_s, i32imm, imm>; 1294defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1295 atomic_load_umin_32_gen, i32imm, imm>; 1296defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1297 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1298defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1299 ".min", atomic_load_umin_64_g, i64imm, imm>; 1300defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1301 ".min", atomic_load_umin_64_s, i64imm, imm>; 1302defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1303 atomic_load_umin_64_gen, i64imm, imm>; 1304defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1305 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>; 1306 1307// atom_inc atom_dec 1308 1309def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1310 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1311def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1312 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1313def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1314 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1315def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1316 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1317def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1318 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1319def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1320 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1321 1322defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1323 atomic_load_inc_32_g, i32imm, imm>; 1324defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1325 atomic_load_inc_32_s, i32imm, imm>; 1326defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1327 atomic_load_inc_32_gen, i32imm, imm>; 1328defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1329 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1330defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1331 atomic_load_dec_32_g, i32imm, imm>; 1332defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1333 atomic_load_dec_32_s, i32imm, imm>; 1334defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1335 atomic_load_dec_32_gen, i32imm, imm>; 1336defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1337 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1338 1339// atom_and 1340 1341def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1342 (atomic_load_and_32 node:$a, node:$b)>; 1343def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1344 (atomic_load_and_32 node:$a, node:$b)>; 1345def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1346 (atomic_load_and_32 node:$a, node:$b)>; 1347def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1348 (atomic_load_and_64 node:$a, node:$b)>; 1349def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1350 (atomic_load_and_64 node:$a, node:$b)>; 1351def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1352 (atomic_load_and_64 node:$a, node:$b)>; 1353 1354defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1355 atomic_load_and_32_g, i32imm, imm>; 1356defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1357 atomic_load_and_32_s, i32imm, imm>; 1358defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1359 atomic_load_and_32_gen, i32imm, imm>; 1360defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1361 ".and", atomic_load_and_32_gen, i32imm, imm>; 1362defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1363 atomic_load_and_64_g, i64imm, imm>; 1364defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1365 atomic_load_and_64_s, i64imm, imm>; 1366defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1367 atomic_load_and_64_gen, i64imm, imm>; 1368defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1369 ".and", atomic_load_and_64_gen, i64imm, imm>; 1370 1371// atom_or 1372 1373def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1374 (atomic_load_or_32 node:$a, node:$b)>; 1375def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1376 (atomic_load_or_32 node:$a, node:$b)>; 1377def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1378 (atomic_load_or_32 node:$a, node:$b)>; 1379def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1380 (atomic_load_or_64 node:$a, node:$b)>; 1381def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1382 (atomic_load_or_64 node:$a, node:$b)>; 1383def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1384 (atomic_load_or_64 node:$a, node:$b)>; 1385 1386defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1387 atomic_load_or_32_g, i32imm, imm>; 1388defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1389 atomic_load_or_32_gen, i32imm, imm>; 1390defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1391 ".or", atomic_load_or_32_gen, i32imm, imm>; 1392defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1393 atomic_load_or_32_s, i32imm, imm>; 1394defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1395 atomic_load_or_64_g, i64imm, imm>; 1396defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1397 atomic_load_or_64_gen, i64imm, imm>; 1398defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1399 ".or", atomic_load_or_64_gen, i64imm, imm>; 1400defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1401 atomic_load_or_64_s, i64imm, imm>; 1402 1403// atom_xor 1404 1405def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1406 (atomic_load_xor_32 node:$a, node:$b)>; 1407def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1408 (atomic_load_xor_32 node:$a, node:$b)>; 1409def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1410 (atomic_load_xor_32 node:$a, node:$b)>; 1411def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1412 (atomic_load_xor_64 node:$a, node:$b)>; 1413def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1414 (atomic_load_xor_64 node:$a, node:$b)>; 1415def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1416 (atomic_load_xor_64 node:$a, node:$b)>; 1417 1418defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1419 atomic_load_xor_32_g, i32imm, imm>; 1420defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1421 atomic_load_xor_32_s, i32imm, imm>; 1422defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1423 atomic_load_xor_32_gen, i32imm, imm>; 1424defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1425 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1426defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1427 atomic_load_xor_64_g, i64imm, imm>; 1428defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1429 atomic_load_xor_64_s, i64imm, imm>; 1430defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1431 atomic_load_xor_64_gen, i64imm, imm>; 1432defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1433 ".xor", atomic_load_xor_64_gen, i64imm, imm>; 1434 1435// atom_cas 1436 1437def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1438 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1439def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1440 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1441def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1442 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1443def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1444 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1445def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1446 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1447def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1448 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1449 1450defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1451 atomic_cmp_swap_32_g, i32imm>; 1452defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1453 atomic_cmp_swap_32_s, i32imm>; 1454defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1455 atomic_cmp_swap_32_gen, i32imm>; 1456defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1457 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1458defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1459 atomic_cmp_swap_64_g, i64imm>; 1460defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1461 atomic_cmp_swap_64_s, i64imm>; 1462defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1463 atomic_cmp_swap_64_gen, i64imm>; 1464defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1465 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1466 1467// Support for scoped atomic operations. Matches 1468// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1469// and converts it into the appropriate instruction. 1470// NOTE: not all possible combinations are implemented 1471// 'space' is limited to generic as it's the only one needed to support CUDA. 1472// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1473class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1474 dag ins, dag Operands> 1475 : NVPTXInst<(outs regclass:$result), ins, 1476 AsmStr, 1477 [(set regclass:$result, Operands)]>, 1478 Requires<Preds>; 1479 1480// Define instruction variants for all addressing modes. 1481multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1482 NVPTXRegClass regclass, Operand ImmType, 1483 SDNode Imm, ValueType ImmTy, 1484 list<Predicate> Preds> { 1485 let AddedComplexity = 1 in { 1486 def : ATOM23_impl<AsmStr, regclass, Preds, 1487 (ins Int32Regs:$src, regclass:$b), 1488 (Intr Int32Regs:$src, regclass:$b)>; 1489 def : ATOM23_impl<AsmStr, regclass, Preds, 1490 (ins Int64Regs:$src, regclass:$b), 1491 (Intr Int64Regs:$src, regclass:$b)>; 1492 } 1493 // tablegen can't infer argument types from Intrinsic (though it can 1494 // from Instruction) so we have to enforce specific type on 1495 // immediates via explicit cast to ImmTy. 1496 def : ATOM23_impl<AsmStr, regclass, Preds, 1497 (ins Int32Regs:$src, ImmType:$b), 1498 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1499 def : ATOM23_impl<AsmStr, regclass, Preds, 1500 (ins Int64Regs:$src, ImmType:$b), 1501 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1502} 1503 1504multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1505 NVPTXRegClass regclass, Operand ImmType, 1506 SDNode Imm, ValueType ImmTy, 1507 list<Predicate> Preds> { 1508 // Variants for register/immediate permutations of $b and $c 1509 let AddedComplexity = 2 in { 1510 def : ATOM23_impl<AsmStr, regclass, Preds, 1511 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1512 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1513 def : ATOM23_impl<AsmStr, regclass, Preds, 1514 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1515 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1516 } 1517 let AddedComplexity = 1 in { 1518 def : ATOM23_impl<AsmStr, regclass, Preds, 1519 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1520 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1521 def : ATOM23_impl<AsmStr, regclass, Preds, 1522 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1523 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1524 def : ATOM23_impl<AsmStr, regclass, Preds, 1525 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1526 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1527 def : ATOM23_impl<AsmStr, regclass, Preds, 1528 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 1529 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1530 } 1531 def : ATOM23_impl<AsmStr, regclass, Preds, 1532 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 1533 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1534 def : ATOM23_impl<AsmStr, regclass, Preds, 1535 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 1536 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1537} 1538 1539// Constructs instrinsic name and instruction asm strings. 1540multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 1541 string ScopeStr, string SpaceStr, 1542 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1543 ValueType ImmTy, list<Predicate> Preds> { 1544 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1545 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1546 # "." # OpStr # "." # TypeStr 1547 # " \t$result, [$src], $b;", 1548 !cast<Intrinsic>( 1549 "int_nvvm_atomic_" # OpStr 1550 # "_" # SpaceStr # "_" # IntTypeStr 1551 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)), 1552 regclass, ImmType, Imm, ImmTy, Preds>; 1553} 1554multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 1555 string ScopeStr, string SpaceStr, 1556 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1557 ValueType ImmTy, list<Predicate> Preds> { 1558 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1559 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1560 # "." # OpStr # "." # TypeStr 1561 # " \t$result, [$src], $b, $c;", 1562 !cast<Intrinsic>( 1563 "int_nvvm_atomic_" # OpStr 1564 # "_" # SpaceStr # "_" # IntTypeStr 1565 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)), 1566 regclass, ImmType, Imm, ImmTy, Preds>; 1567} 1568 1569// Constructs variants for different address spaces. 1570// For now we only need variants for generic space pointers. 1571multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 1572 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1573 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1574 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1575 regclass, ImmType, Imm, ImmTy, Preds>; 1576} 1577multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 1578 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1579 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1580 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1581 regclass, ImmType, Imm, ImmTy, Preds>; 1582} 1583 1584// Constructs variants for different scopes of atomic op. 1585multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 1586 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1587 ValueType ImmTy, list<Predicate> Preds> { 1588 // .gpu scope is default and is currently covered by existing 1589 // atomics w/o explicitly specified scope. 1590 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1591 regclass, ImmType, Imm, ImmTy, 1592 !listconcat(Preds,[hasAtomScope])>; 1593 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1594 regclass, ImmType, Imm, ImmTy, 1595 !listconcat(Preds,[hasAtomScope])>; 1596} 1597multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 1598 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 1599 list<Predicate> Preds> { 1600 // No need to define ".gpu"-scoped atomics. They do the same thing 1601 // as the regular, non-scoped atomics defined elsewhere. 1602 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1603 regclass, ImmType, Imm, ImmTy, 1604 !listconcat(Preds,[hasAtomScope])>; 1605 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1606 regclass, ImmType, Imm, ImmTy, 1607 !listconcat(Preds,[hasAtomScope])>; 1608} 1609 1610// atom.add 1611multiclass ATOM2_add_impl<string OpStr> { 1612 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1613 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1614 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 1615 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 1616 []>; 1617 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 1618 [hasAtomAddF64]>; 1619} 1620 1621// atom.{and,or,xor} 1622multiclass ATOM2_bitwise_impl<string OpStr> { 1623 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1624 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 1625 [hasAtomBitwise64]>; 1626} 1627 1628// atom.exch 1629multiclass ATOM2_exch_impl<string OpStr> { 1630 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1631 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1632} 1633 1634// atom.{min,max} 1635multiclass ATOM2_minmax_impl<string OpStr> { 1636 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1637 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1638 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 1639 [hasAtomMinMax64]>; 1640 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 1641 [hasAtomMinMax64]>; 1642} 1643 1644// atom.{inc,dec} 1645multiclass ATOM2_incdec_impl<string OpStr> { 1646 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1647} 1648 1649// atom.cas 1650multiclass ATOM3_cas_impl<string OpStr> { 1651 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1652 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1653} 1654 1655defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 1656defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 1657defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 1658defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 1659defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 1660defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 1661defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 1662defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 1663defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 1664defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 1665 1666//----------------------------------- 1667// Support for ldu on sm_20 or later 1668//----------------------------------- 1669 1670// Don't annotate ldu instructions as mayLoad, as they load from memory that is 1671// read-only in a kernel. 1672 1673// Scalar 1674 1675multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 1676 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1677 !strconcat("ldu.global.", TyStr), 1678 []>, Requires<[hasLDU]>; 1679 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1680 !strconcat("ldu.global.", TyStr), 1681 []>, Requires<[hasLDU]>; 1682 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1683 !strconcat("ldu.global.", TyStr), 1684 []>, Requires<[hasLDU]>; 1685 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1686 !strconcat("ldu.global.", TyStr), 1687 []>, Requires<[hasLDU]>; 1688 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1689 !strconcat("ldu.global.", TyStr), 1690 []>, Requires<[hasLDU]>; 1691} 1692 1693defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 1694defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 1695defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1696defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1697defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; 1698defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; 1699defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 1700defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 1701defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1702defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1703 1704// vector 1705 1706// Elementized vector ldu 1707multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1708 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1709 (ins Int32Regs:$src), 1710 !strconcat("ldu.global.", TyStr), []>; 1711 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1712 (ins Int64Regs:$src), 1713 !strconcat("ldu.global.", TyStr), []>; 1714 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1715 (ins MEMri:$src), 1716 !strconcat("ldu.global.", TyStr), []>; 1717 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1718 (ins MEMri64:$src), 1719 !strconcat("ldu.global.", TyStr), []>; 1720 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1721 (ins imemAny:$src), 1722 !strconcat("ldu.global.", TyStr), []>; 1723} 1724 1725multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1726 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1727 regclass:$dst4), (ins Int32Regs:$src), 1728 !strconcat("ldu.global.", TyStr), []>; 1729 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1730 regclass:$dst4), (ins Int64Regs:$src), 1731 !strconcat("ldu.global.", TyStr), []>; 1732 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1733 regclass:$dst4), (ins MEMri:$src), 1734 !strconcat("ldu.global.", TyStr), []>; 1735 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1736 regclass:$dst4), (ins MEMri64:$src), 1737 !strconcat("ldu.global.", TyStr), []>; 1738 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1739 regclass:$dst4), (ins imemAny:$src), 1740 !strconcat("ldu.global.", TyStr), []>; 1741} 1742 1743defm INT_PTX_LDU_G_v2i8_ELE 1744 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1745defm INT_PTX_LDU_G_v2i16_ELE 1746 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1747defm INT_PTX_LDU_G_v2i32_ELE 1748 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1749defm INT_PTX_LDU_G_v2f16_ELE 1750 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1751defm INT_PTX_LDU_G_v2f16x2_ELE 1752 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1753defm INT_PTX_LDU_G_v2f32_ELE 1754 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1755defm INT_PTX_LDU_G_v2i64_ELE 1756 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1757defm INT_PTX_LDU_G_v2f64_ELE 1758 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1759defm INT_PTX_LDU_G_v4i8_ELE 1760 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1761defm INT_PTX_LDU_G_v4i16_ELE 1762 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1763 Int16Regs>; 1764defm INT_PTX_LDU_G_v4i32_ELE 1765 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1766 Int32Regs>; 1767defm INT_PTX_LDU_G_v4f16_ELE 1768 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1769 Float16Regs>; 1770defm INT_PTX_LDU_G_v4f16x2_ELE 1771 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1772 Float16x2Regs>; 1773defm INT_PTX_LDU_G_v4f32_ELE 1774 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1775 Float32Regs>; 1776 1777 1778//----------------------------------- 1779// Support for ldg on sm_35 or later 1780//----------------------------------- 1781 1782// Don't annotate ld.global.nc as mayLoad, because these loads go through the 1783// non-coherent texture cache, and therefore the values read must be read-only 1784// during the lifetime of the kernel. 1785 1786multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 1787 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1788 !strconcat("ld.global.nc.", TyStr), 1789 []>, Requires<[hasLDG]>; 1790 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1791 !strconcat("ld.global.nc.", TyStr), 1792 []>, Requires<[hasLDG]>; 1793 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1794 !strconcat("ld.global.nc.", TyStr), 1795 []>, Requires<[hasLDG]>; 1796 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1797 !strconcat("ld.global.nc.", TyStr), 1798 []>, Requires<[hasLDG]>; 1799 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1800 !strconcat("ld.global.nc.", TyStr), 1801 []>, Requires<[hasLDG]>; 1802} 1803 1804defm INT_PTX_LDG_GLOBAL_i8 1805 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 1806defm INT_PTX_LDG_GLOBAL_i16 1807 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 1808defm INT_PTX_LDG_GLOBAL_i32 1809 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 1810defm INT_PTX_LDG_GLOBAL_i64 1811 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 1812defm INT_PTX_LDG_GLOBAL_f16 1813 : LDG_G<"b16 \t$result, [$src];", Float16Regs>; 1814defm INT_PTX_LDG_GLOBAL_f16x2 1815 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; 1816defm INT_PTX_LDG_GLOBAL_f32 1817 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 1818defm INT_PTX_LDG_GLOBAL_f64 1819 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 1820defm INT_PTX_LDG_GLOBAL_p32 1821 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 1822defm INT_PTX_LDG_GLOBAL_p64 1823 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 1824 1825// vector 1826 1827// Elementized vector ldg 1828multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1829 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1830 (ins Int32Regs:$src), 1831 !strconcat("ld.global.nc.", TyStr), []>; 1832 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1833 (ins Int64Regs:$src), 1834 !strconcat("ld.global.nc.", TyStr), []>; 1835 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1836 (ins MEMri:$src), 1837 !strconcat("ld.global.nc.", TyStr), []>; 1838 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1839 (ins MEMri64:$src), 1840 !strconcat("ld.global.nc.", TyStr), []>; 1841 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1842 (ins imemAny:$src), 1843 !strconcat("ld.global.nc.", TyStr), []>; 1844} 1845 1846multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1847 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1848 regclass:$dst4), (ins Int32Regs:$src), 1849 !strconcat("ld.global.nc.", TyStr), []>; 1850 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1851 regclass:$dst4), (ins Int64Regs:$src), 1852 !strconcat("ld.global.nc.", TyStr), []>; 1853 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1854 regclass:$dst4), (ins MEMri:$src), 1855 !strconcat("ld.global.nc.", TyStr), []>; 1856 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1857 regclass:$dst4), (ins MEMri64:$src), 1858 !strconcat("ld.global.nc.", TyStr), []>; 1859 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1860 regclass:$dst4), (ins imemAny:$src), 1861 !strconcat("ld.global.nc.", TyStr), []>; 1862} 1863 1864// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 1865defm INT_PTX_LDG_G_v2i8_ELE 1866 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1867defm INT_PTX_LDG_G_v2i16_ELE 1868 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1869defm INT_PTX_LDG_G_v2i32_ELE 1870 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1871defm INT_PTX_LDG_G_v2f16_ELE 1872 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1873defm INT_PTX_LDG_G_v2f16x2_ELE 1874 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1875defm INT_PTX_LDG_G_v2f32_ELE 1876 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1877defm INT_PTX_LDG_G_v2i64_ELE 1878 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1879defm INT_PTX_LDG_G_v2f64_ELE 1880 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1881defm INT_PTX_LDG_G_v4i8_ELE 1882 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1883defm INT_PTX_LDG_G_v4i16_ELE 1884 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1885defm INT_PTX_LDG_G_v4i32_ELE 1886 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 1887defm INT_PTX_LDG_G_v4f16_ELE 1888 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; 1889defm INT_PTX_LDG_G_v4f16x2_ELE 1890 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; 1891defm INT_PTX_LDG_G_v4f32_ELE 1892 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 1893 1894 1895multiclass NG_TO_G<string Str, Intrinsic Intrin> { 1896 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1897 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 1898 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1899 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1900 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 1901 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1902 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 1903 "{{ .reg .b64 %tmp;\n\t" 1904 #" cvt.u64.u32 \t%tmp, $src;\n\t" 1905 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 1906 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 1907 Requires<[useShortPtr]>; 1908} 1909 1910multiclass G_TO_NG<string Str, Intrinsic Intrin> { 1911 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1912 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 1913 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1914 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1915 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 1916 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1917 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 1918 "{{ .reg .b64 %tmp;\n\t" 1919 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 1920 #" cvt.u32.u64 \t$result, %tmp; }}", 1921 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 1922 Requires<[useShortPtr]>; 1923} 1924 1925defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 1926defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 1927defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 1928defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 1929 1930defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 1931defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 1932defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 1933defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 1934 1935 1936// nvvm.ptr.gen.to.param 1937def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 1938 (ins Int32Regs:$src), 1939 "mov.u32 \t$result, $src;", 1940 [(set Int32Regs:$result, 1941 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 1942def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 1943 (ins Int64Regs:$src), 1944 "mov.u64 \t$result, $src;", 1945 [(set Int64Regs:$result, 1946 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 1947 1948 1949// nvvm.move intrinsicc 1950def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 1951 "mov.b16 \t$r, $s;", 1952 [(set Int16Regs:$r, 1953 (int_nvvm_move_i16 Int16Regs:$s))]>; 1954def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1955 "mov.b32 \t$r, $s;", 1956 [(set Int32Regs:$r, 1957 (int_nvvm_move_i32 Int32Regs:$s))]>; 1958def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1959 "mov.b64 \t$r, $s;", 1960 [(set Int64Regs:$r, 1961 (int_nvvm_move_i64 Int64Regs:$s))]>; 1962def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 1963 "mov.f32 \t$r, $s;", 1964 [(set Float32Regs:$r, 1965 (int_nvvm_move_float Float32Regs:$s))]>; 1966def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 1967 "mov.f64 \t$r, $s;", 1968 [(set Float64Regs:$r, 1969 (int_nvvm_move_double Float64Regs:$s))]>; 1970def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 1971 "mov.u32 \t$r, $s;", 1972 [(set Int32Regs:$r, 1973 (int_nvvm_move_ptr Int32Regs:$s))]>; 1974def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 1975 "mov.u64 \t$r, $s;", 1976 [(set Int64Regs:$r, 1977 (int_nvvm_move_ptr Int64Regs:$s))]>; 1978 1979// @TODO: Are these actually needed, or will we always just see symbols 1980// copied to registers first? 1981/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 1982 "mov.u32 \t$r, $s;", 1983 [(set Int32Regs:$r, 1984 (int_nvvm_move_ptr texternalsym:$s))]>; 1985def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 1986 "mov.u64 \t$r, $s;", 1987 [(set Int64Regs:$r, 1988 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 1989 1990 1991// MoveParam %r1, param 1992// ptr_local_to_gen %r2, %r1 1993// ptr_gen_to_local %r3, %r2 1994// -> 1995// mov %r1, param 1996 1997// @TODO: Revisit this. There is a type 1998// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 1999// instructions are not currently defined. However, we can use the ptr 2000// variants and the asm printer will do the right thing. 2001def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2002 (MoveParam texternalsym:$src)))), 2003 (nvvm_move_ptr64 texternalsym:$src)>; 2004def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2005 (MoveParam texternalsym:$src)))), 2006 (nvvm_move_ptr32 texternalsym:$src)>; 2007 2008def texsurf_handles 2009 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2010 "mov.u64 \t$result, $src;", []>; 2011 2012//----------------------------------- 2013// Compiler Error Warn 2014// - Just ignore them in codegen 2015//----------------------------------- 2016 2017def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2018 "// llvm.nvvm.compiler.warn()", 2019 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2020def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2021 "// llvm.nvvm.compiler.warn()", 2022 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2023def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2024 "// llvm.nvvm.compiler.error()", 2025 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2026def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2027 "// llvm.nvvm.compiler.error()", 2028 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2029 2030 2031// isspacep 2032 2033def ISSPACEP_CONST_32 2034 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2035 "isspacep.const \t$d, $a;", 2036 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 2037 Requires<[hasPTX31]>; 2038def ISSPACEP_CONST_64 2039 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2040 "isspacep.const \t$d, $a;", 2041 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 2042 Requires<[hasPTX31]>; 2043def ISSPACEP_GLOBAL_32 2044 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2045 "isspacep.global \t$d, $a;", 2046 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 2047def ISSPACEP_GLOBAL_64 2048 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2049 "isspacep.global \t$d, $a;", 2050 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 2051def ISSPACEP_LOCAL_32 2052 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2053 "isspacep.local \t$d, $a;", 2054 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 2055def ISSPACEP_LOCAL_64 2056 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2057 "isspacep.local \t$d, $a;", 2058 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 2059def ISSPACEP_SHARED_32 2060 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2061 "isspacep.shared \t$d, $a;", 2062 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 2063def ISSPACEP_SHARED_64 2064 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2065 "isspacep.shared \t$d, $a;", 2066 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 2067 2068 2069// Special register reads 2070def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2071 (ins SpecialRegs:$r), 2072 "mov.b32 \t$d, $r;", []>; 2073 2074def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2075def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2076def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2077def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2078def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2079def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2080def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2081def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2082def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2083def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2084def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2085def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2086def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2087def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2088def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2089def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2090def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2091def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2092def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2093def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2094def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2095def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2096def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2097def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2098def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2099def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2100def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2101def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2102def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2103def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2104def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2105def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2106 2107 2108// rotate builtin support 2109 2110def ROTATE_B32_HW_IMM 2111 : NVPTXInst<(outs Int32Regs:$dst), 2112 (ins Int32Regs:$src, i32imm:$amt), 2113 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2114 [(set Int32Regs:$dst, 2115 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2116 Requires<[hasHWROT32]> ; 2117 2118def ROTATE_B32_HW_REG 2119 : NVPTXInst<(outs Int32Regs:$dst), 2120 (ins Int32Regs:$src, Int32Regs:$amt), 2121 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2122 [(set Int32Regs:$dst, 2123 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2124 Requires<[hasHWROT32]> ; 2125 2126def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2127 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2128 Requires<[noHWROT32]> ; 2129 2130def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2131 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2132 Requires<[noHWROT32]> ; 2133 2134let hasSideEffects = 0 in { 2135 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2136 !strconcat("{{\n\t", 2137 ".reg .b32 %dummy;\n\t", 2138 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2139 "}}"), 2140 []> ; 2141 2142 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2143 !strconcat("{{\n\t", 2144 ".reg .b32 %dummy;\n\t", 2145 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2146 "}}"), 2147 []> ; 2148} 2149 2150let hasSideEffects = 0 in { 2151 def PACK_TWO_INT32 2152 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2153 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2154} 2155 2156def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2157 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2158 (GET_LO_INT64 Int64Regs:$src))> ; 2159 2160// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2161// no side effects. 2162let hasSideEffects = 0 in { 2163 def SHF_L_WRAP_B32_IMM 2164 : NVPTXInst<(outs Int32Regs:$dst), 2165 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2166 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2167 Requires<[hasHWROT32]>; 2168 2169 def SHF_L_WRAP_B32_REG 2170 : NVPTXInst<(outs Int32Regs:$dst), 2171 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2172 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2173 Requires<[hasHWROT32]>; 2174 2175 def SHF_R_WRAP_B32_IMM 2176 : NVPTXInst<(outs Int32Regs:$dst), 2177 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2178 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2179 Requires<[hasHWROT32]>; 2180 2181 def SHF_R_WRAP_B32_REG 2182 : NVPTXInst<(outs Int32Regs:$dst), 2183 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2184 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2185 Requires<[hasHWROT32]>; 2186} 2187 2188// HW version of rotate 64 2189def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2190 (PACK_TWO_INT32 2191 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2192 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2193 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2194 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2195 Requires<[hasHWROT32]>; 2196 2197def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2198 (PACK_TWO_INT32 2199 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2200 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2201 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2202 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2203 Requires<[hasHWROT32]>; 2204 2205 2206def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2207 (PACK_TWO_INT32 2208 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2209 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2210 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2211 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2212 Requires<[hasHWROT32]>; 2213 2214def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2215 (PACK_TWO_INT32 2216 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2217 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2218 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2219 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2220 Requires<[hasHWROT32]>; 2221 2222// SW version of rotate 64 2223def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2224 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2225 Requires<[noHWROT32]>; 2226def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2227 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2228 Requires<[noHWROT32]>; 2229def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2230 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2231 Requires<[noHWROT32]>; 2232def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2233 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2234 Requires<[noHWROT32]>; 2235 2236 2237//----------------------------------- 2238// Texture Intrinsics 2239//----------------------------------- 2240 2241// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2242// also defined in NVPTXReplaceImageHandles.cpp 2243 2244// texmode_independent 2245let IsTex = 1, IsTexModeUnified = 0 in { 2246// Texture fetch instructions using handles 2247def TEX_1D_F32_S32 2248 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2249 Float32Regs:$b, Float32Regs:$a), 2250 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2251 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2252 []>; 2253def TEX_1D_F32_F32 2254 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2255 Float32Regs:$b, Float32Regs:$a), 2256 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2257 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2258 []>; 2259def TEX_1D_F32_F32_LEVEL 2260 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2261 Float32Regs:$b, Float32Regs:$a), 2262 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), 2263 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2264 "[$t, $s, \\{$x\\}], $lod;", 2265 []>; 2266def TEX_1D_F32_F32_GRAD 2267 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2268 Float32Regs:$b, Float32Regs:$a), 2269 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2270 Float32Regs:$gradx, Float32Regs:$grady), 2271 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2272 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2273 []>; 2274def TEX_1D_S32_S32 2275 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2276 Int32Regs:$b, Int32Regs:$a), 2277 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2278 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2279 []>; 2280def TEX_1D_S32_F32 2281 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2282 Int32Regs:$b, Int32Regs:$a), 2283 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2284 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2285 []>; 2286def TEX_1D_S32_F32_LEVEL 2287 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2288 Int32Regs:$b, Int32Regs:$a), 2289 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2290 Float32Regs:$lod), 2291 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2292 "[$t, $s, \\{$x\\}], $lod;", 2293 []>; 2294def TEX_1D_S32_F32_GRAD 2295 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2296 Int32Regs:$b, Int32Regs:$a), 2297 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2298 Float32Regs:$gradx, Float32Regs:$grady), 2299 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2300 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2301 []>; 2302def TEX_1D_U32_S32 2303 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2304 Int32Regs:$b, Int32Regs:$a), 2305 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2306 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2307 []>; 2308def TEX_1D_U32_F32 2309 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2310 Int32Regs:$b, Int32Regs:$a), 2311 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2312 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2313 []>; 2314def TEX_1D_U32_F32_LEVEL 2315 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2316 Int32Regs:$b, Int32Regs:$a), 2317 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2318 Float32Regs:$lod), 2319 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2320 "[$t, $s, \\{$x\\}], $lod;", 2321 []>; 2322def TEX_1D_U32_F32_GRAD 2323 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2324 Int32Regs:$b, Int32Regs:$a), 2325 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2326 Float32Regs:$gradx, Float32Regs:$grady), 2327 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2328 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2329 []>; 2330 2331def TEX_1D_ARRAY_F32_S32 2332 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2333 Float32Regs:$b, Float32Regs:$a), 2334 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2335 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2336 "[$t, $s, \\{$l, $x\\}];", 2337 []>; 2338def TEX_1D_ARRAY_F32_F32 2339 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2340 Float32Regs:$b, Float32Regs:$a), 2341 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2342 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2343 "[$t, $s, \\{$l, $x\\}];", 2344 []>; 2345def TEX_1D_ARRAY_F32_F32_LEVEL 2346 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2347 Float32Regs:$b, Float32Regs:$a), 2348 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2349 Float32Regs:$lod), 2350 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2351 "[$t, $s, \\{$l, $x\\}], $lod;", 2352 []>; 2353def TEX_1D_ARRAY_F32_F32_GRAD 2354 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2355 Float32Regs:$b, Float32Regs:$a), 2356 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2357 Float32Regs:$gradx, Float32Regs:$grady), 2358 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2359 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2360 []>; 2361def TEX_1D_ARRAY_S32_S32 2362 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2363 Int32Regs:$b, Int32Regs:$a), 2364 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2365 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2366 "[$t, $s, \\{$l, $x\\}];", 2367 []>; 2368def TEX_1D_ARRAY_S32_F32 2369 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2370 Int32Regs:$b, Int32Regs:$a), 2371 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2372 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2373 "[$t, $s, \\{$l, $x\\}];", 2374 []>; 2375def TEX_1D_ARRAY_S32_F32_LEVEL 2376 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2377 Int32Regs:$b, Int32Regs:$a), 2378 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2379 Float32Regs:$lod), 2380 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2381 "[$t, $s, \\{$l, $x\\}], $lod;", 2382 []>; 2383def TEX_1D_ARRAY_S32_F32_GRAD 2384 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2385 Int32Regs:$b, Int32Regs:$a), 2386 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2387 Float32Regs:$gradx, Float32Regs:$grady), 2388 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2389 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2390 []>; 2391def TEX_1D_ARRAY_U32_S32 2392 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2393 Int32Regs:$b, Int32Regs:$a), 2394 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2395 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2396 "[$t, $s, \\{$l, $x\\}];", 2397 []>; 2398def TEX_1D_ARRAY_U32_F32 2399 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2400 Int32Regs:$b, Int32Regs:$a), 2401 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2402 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2403 "[$t, $s, \\{$l, $x\\}];", 2404 []>; 2405def TEX_1D_ARRAY_U32_F32_LEVEL 2406 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2407 Int32Regs:$b, Int32Regs:$a), 2408 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2409 Float32Regs:$lod), 2410 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2411 "[$t, $s, \\{$l, $x\\}], $lod;", 2412 []>; 2413def TEX_1D_ARRAY_U32_F32_GRAD 2414 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2415 Int32Regs:$b, Int32Regs:$a), 2416 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2417 Float32Regs:$gradx, Float32Regs:$grady), 2418 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2419 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2420 []>; 2421 2422def TEX_2D_F32_S32 2423 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2424 Float32Regs:$b, Float32Regs:$a), 2425 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2426 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2427 "[$t, $s, \\{$x, $y\\}];", 2428 []>; 2429def TEX_2D_F32_F32 2430 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2431 Float32Regs:$b, Float32Regs:$a), 2432 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2433 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2434 "[$t, $s, \\{$x, $y\\}];", 2435 []>; 2436def TEX_2D_F32_F32_LEVEL 2437 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2438 Float32Regs:$b, Float32Regs:$a), 2439 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2440 Float32Regs:$lod), 2441 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2442 "[$t, $s, \\{$x, $y\\}], $lod;", 2443 []>; 2444def TEX_2D_F32_F32_GRAD 2445 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2446 Float32Regs:$b, Float32Regs:$a), 2447 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2448 Float32Regs:$gradx0, Float32Regs:$gradx1, 2449 Float32Regs:$grady0, Float32Regs:$grady1), 2450 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2451 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2452 "\\{$grady0, $grady1\\};", 2453 []>; 2454def TEX_2D_S32_S32 2455 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2456 Int32Regs:$b, Int32Regs:$a), 2457 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2458 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2459 "[$t, $s, \\{$x, $y\\}];", 2460 []>; 2461def TEX_2D_S32_F32 2462 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2463 Int32Regs:$b, Int32Regs:$a), 2464 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2465 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2466 "[$t, $s, \\{$x, $y\\}];", 2467 []>; 2468def TEX_2D_S32_F32_LEVEL 2469 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2470 Int32Regs:$b, Int32Regs:$a), 2471 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2472 Float32Regs:$lod), 2473 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2474 "[$t, $s, \\{$x, $y\\}], $lod;", 2475 []>; 2476def TEX_2D_S32_F32_GRAD 2477 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2478 Int32Regs:$b, Int32Regs:$a), 2479 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2480 Float32Regs:$gradx0, Float32Regs:$gradx1, 2481 Float32Regs:$grady0, Float32Regs:$grady1), 2482 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2483 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2484 "\\{$grady0, $grady1\\};", 2485 []>; 2486def TEX_2D_U32_S32 2487 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2488 Int32Regs:$b, Int32Regs:$a), 2489 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2490 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2491 "[$t, $s, \\{$x, $y\\}];", 2492 []>; 2493def TEX_2D_U32_F32 2494 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2495 Int32Regs:$b, Int32Regs:$a), 2496 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2497 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2498 "[$t, $s, \\{$x, $y\\}];", 2499 []>; 2500def TEX_2D_U32_F32_LEVEL 2501 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2502 Int32Regs:$b, Int32Regs:$a), 2503 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2504 Float32Regs:$lod), 2505 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2506 "[$t, $s, \\{$x, $y\\}], $lod;", 2507 []>; 2508def TEX_2D_U32_F32_GRAD 2509 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2510 Int32Regs:$b, Int32Regs:$a), 2511 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2512 Float32Regs:$gradx0, Float32Regs:$gradx1, 2513 Float32Regs:$grady0, Float32Regs:$grady1), 2514 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2515 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2516 "\\{$grady0, $grady1\\};", 2517 []>; 2518 2519def TEX_2D_ARRAY_F32_S32 2520 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2521 Float32Regs:$b, Float32Regs:$a), 2522 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2523 Int32Regs:$y), 2524 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2525 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2526 []>; 2527def TEX_2D_ARRAY_F32_F32 2528 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2529 Float32Regs:$b, Float32Regs:$a), 2530 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2531 Float32Regs:$y), 2532 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2533 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2534 []>; 2535def TEX_2D_ARRAY_F32_F32_LEVEL 2536 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2537 Float32Regs:$b, Float32Regs:$a), 2538 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2539 Float32Regs:$y, Float32Regs:$lod), 2540 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2541 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2542 []>; 2543def TEX_2D_ARRAY_F32_F32_GRAD 2544 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2545 Float32Regs:$b, Float32Regs:$a), 2546 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2547 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 2548 Float32Regs:$grady0, Float32Regs:$grady1), 2549 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2550 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2551 "\\{$grady0, $grady1\\};", 2552 []>; 2553def TEX_2D_ARRAY_S32_S32 2554 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2555 Int32Regs:$b, Int32Regs:$a), 2556 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2557 Int32Regs:$y), 2558 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2559 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2560 []>; 2561def TEX_2D_ARRAY_S32_F32 2562 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2563 Int32Regs:$b, Int32Regs:$a), 2564 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2565 Float32Regs:$y), 2566 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2567 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2568 []>; 2569def TEX_2D_ARRAY_S32_F32_LEVEL 2570 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2571 Int32Regs:$b, Int32Regs:$a), 2572 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2573 Float32Regs:$y, Float32Regs:$lod), 2574 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2575 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2576 []>; 2577def TEX_2D_ARRAY_S32_F32_GRAD 2578 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2579 Int32Regs:$b, Int32Regs:$a), 2580 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2581 Float32Regs:$y, 2582 Float32Regs:$gradx0, Float32Regs:$gradx1, 2583 Float32Regs:$grady0, Float32Regs:$grady1), 2584 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2585 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2586 "\\{$grady0, $grady1\\};", 2587 []>; 2588def TEX_2D_ARRAY_U32_S32 2589 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2590 Int32Regs:$b, Int32Regs:$a), 2591 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2592 Int32Regs:$y), 2593 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2594 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2595 []>; 2596def TEX_2D_ARRAY_U32_F32 2597 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2598 Int32Regs:$b, Int32Regs:$a), 2599 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2600 Float32Regs:$y), 2601 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2602 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2603 []>; 2604def TEX_2D_ARRAY_U32_F32_LEVEL 2605 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2606 Int32Regs:$b, Int32Regs:$a), 2607 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2608 Float32Regs:$y, Float32Regs:$lod), 2609 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2610 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2611 []>; 2612def TEX_2D_ARRAY_U32_F32_GRAD 2613 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2614 Int32Regs:$b, Int32Regs:$a), 2615 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2616 Float32Regs:$y, 2617 Float32Regs:$gradx0, Float32Regs:$gradx1, 2618 Float32Regs:$grady0, Float32Regs:$grady1), 2619 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2620 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2621 "\\{$grady0, $grady1\\};", 2622 []>; 2623 2624def TEX_3D_F32_S32 2625 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2626 Float32Regs:$b, Float32Regs:$a), 2627 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2628 Int32Regs:$z), 2629 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2630 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2631 []>; 2632def TEX_3D_F32_F32 2633 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2634 Float32Regs:$b, Float32Regs:$a), 2635 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2636 Float32Regs:$z), 2637 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2638 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2639 []>; 2640def TEX_3D_F32_F32_LEVEL 2641 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2642 Float32Regs:$b, Float32Regs:$a), 2643 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2644 Float32Regs:$z, Float32Regs:$lod), 2645 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2646 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2647 []>; 2648def TEX_3D_F32_F32_GRAD 2649 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2650 Float32Regs:$b, Float32Regs:$a), 2651 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2652 Float32Regs:$z, 2653 Float32Regs:$gradx0, Float32Regs:$gradx1, 2654 Float32Regs:$gradx2, Float32Regs:$grady0, 2655 Float32Regs:$grady1, Float32Regs:$grady2), 2656 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2657 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2658 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2659 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2660 []>; 2661def TEX_3D_S32_S32 2662 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2663 Int32Regs:$b, Int32Regs:$a), 2664 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2665 Int32Regs:$z), 2666 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2667 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2668 []>; 2669def TEX_3D_S32_F32 2670 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2671 Int32Regs:$b, Int32Regs:$a), 2672 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2673 Float32Regs:$z), 2674 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2675 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2676 []>; 2677def TEX_3D_S32_F32_LEVEL 2678 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2679 Int32Regs:$b, Int32Regs:$a), 2680 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2681 Float32Regs:$z, Float32Regs:$lod), 2682 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2683 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2684 []>; 2685def TEX_3D_S32_F32_GRAD 2686 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2687 Int32Regs:$b, Int32Regs:$a), 2688 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2689 Float32Regs:$z, 2690 Float32Regs:$gradx0, Float32Regs:$gradx1, 2691 Float32Regs:$gradx2, Float32Regs:$grady0, 2692 Float32Regs:$grady1, Float32Regs:$grady2), 2693 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2694 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2695 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2696 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2697 []>; 2698def TEX_3D_U32_S32 2699 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2700 Int32Regs:$b, Int32Regs:$a), 2701 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2702 Int32Regs:$z), 2703 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2704 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2705 []>; 2706def TEX_3D_U32_F32 2707 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2708 Int32Regs:$b, Int32Regs:$a), 2709 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2710 Float32Regs:$z), 2711 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2712 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2713 []>; 2714def TEX_3D_U32_F32_LEVEL 2715 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2716 Int32Regs:$b, Int32Regs:$a), 2717 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2718 Float32Regs:$z, Float32Regs:$lod), 2719 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2720 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2721 []>; 2722def TEX_3D_U32_F32_GRAD 2723 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2724 Int32Regs:$b, Int32Regs:$a), 2725 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2726 Float32Regs:$z, 2727 Float32Regs:$gradx0, Float32Regs:$gradx1, 2728 Float32Regs:$gradx2, Float32Regs:$grady0, 2729 Float32Regs:$grady1, Float32Regs:$grady2), 2730 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2731 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2732 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2733 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2734 []>; 2735 2736def TEX_CUBE_F32_F32 2737 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2738 Float32Regs:$b, Float32Regs:$a), 2739 (ins Int64Regs:$t, Int64Regs:$s, 2740 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2741 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2742 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2743 []>; 2744def TEX_CUBE_F32_F32_LEVEL 2745 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2746 Float32Regs:$b, Float32Regs:$a), 2747 (ins Int64Regs:$t, Int64Regs:$s, 2748 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2749 Float32Regs:$lod), 2750 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2751 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2752 []>; 2753def TEX_CUBE_S32_F32 2754 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2755 Int32Regs:$b, Int32Regs:$a), 2756 (ins Int64Regs:$t, Int64Regs:$s, 2757 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2758 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2759 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2760 []>; 2761def TEX_CUBE_S32_F32_LEVEL 2762 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2763 Int32Regs:$b, Int32Regs:$a), 2764 (ins Int64Regs:$t, Int64Regs:$s, 2765 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2766 Float32Regs:$lod), 2767 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2768 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2769 []>; 2770def TEX_CUBE_U32_F32 2771 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2772 Int32Regs:$b, Int32Regs:$a), 2773 (ins Int64Regs:$t, Int64Regs:$s, 2774 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2775 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2776 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2777 []>; 2778def TEX_CUBE_U32_F32_LEVEL 2779 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2780 Int32Regs:$b, Int32Regs:$a), 2781 (ins Int64Regs:$t, Int64Regs:$s, 2782 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2783 Float32Regs:$lod), 2784 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2785 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2786 []>; 2787 2788def TEX_CUBE_ARRAY_F32_F32 2789 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2790 Float32Regs:$b, Float32Regs:$a), 2791 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2792 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2793 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2794 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2795 []>; 2796def TEX_CUBE_ARRAY_F32_F32_LEVEL 2797 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2798 Float32Regs:$b, Float32Regs:$a), 2799 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2800 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2801 Float32Regs:$lod), 2802 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2803 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2804 []>; 2805def TEX_CUBE_ARRAY_S32_F32 2806 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2807 Int32Regs:$b, Int32Regs:$a), 2808 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2809 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2810 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2811 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2812 []>; 2813def TEX_CUBE_ARRAY_S32_F32_LEVEL 2814 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2815 Int32Regs:$b, Int32Regs:$a), 2816 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2817 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2818 Float32Regs:$lod), 2819 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2820 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2821 []>; 2822def TEX_CUBE_ARRAY_U32_F32 2823 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2824 Int32Regs:$b, Int32Regs:$a), 2825 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2826 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2827 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2828 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2829 []>; 2830def TEX_CUBE_ARRAY_U32_F32_LEVEL 2831 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2832 Int32Regs:$b, Int32Regs:$a), 2833 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2834 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2835 Float32Regs:$lod), 2836 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2837 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2838 []>; 2839 2840def TLD4_R_2D_F32_F32 2841 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2842 Float32Regs:$v2, Float32Regs:$v3), 2843 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2844 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2845 "[$t, $s, \\{$x, $y\\}];", 2846 []>; 2847def TLD4_G_2D_F32_F32 2848 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2849 Float32Regs:$v2, Float32Regs:$v3), 2850 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2851 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2852 "[$t, $s, \\{$x, $y\\}];", 2853 []>; 2854def TLD4_B_2D_F32_F32 2855 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2856 Float32Regs:$v2, Float32Regs:$v3), 2857 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2858 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2859 "[$t, $s, \\{$x, $y\\}];", 2860 []>; 2861def TLD4_A_2D_F32_F32 2862 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2863 Float32Regs:$v2, Float32Regs:$v3), 2864 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2865 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2866 "[$t, $s, \\{$x, $y\\}];", 2867 []>; 2868def TLD4_R_2D_S32_F32 2869 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2870 Int32Regs:$v2, Int32Regs:$v3), 2871 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2872 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2873 "[$t, $s, \\{$x, $y\\}];", 2874 []>; 2875def TLD4_G_2D_S32_F32 2876 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2877 Int32Regs:$v2, Int32Regs:$v3), 2878 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2879 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2880 "[$t, $s, \\{$x, $y\\}];", 2881 []>; 2882def TLD4_B_2D_S32_F32 2883 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2884 Int32Regs:$v2, Int32Regs:$v3), 2885 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2886 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2887 "[$t, $s, \\{$x, $y\\}];", 2888 []>; 2889def TLD4_A_2D_S32_F32 2890 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2891 Int32Regs:$v2, Int32Regs:$v3), 2892 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2893 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2894 "[$t, $s, \\{$x, $y\\}];", 2895 []>; 2896def TLD4_R_2D_U32_F32 2897 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2898 Int32Regs:$v2, Int32Regs:$v3), 2899 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2900 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2901 "[$t, $s, \\{$x, $y\\}];", 2902 []>; 2903def TLD4_G_2D_U32_F32 2904 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2905 Int32Regs:$v2, Int32Regs:$v3), 2906 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2907 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2908 "[$t, $s, \\{$x, $y\\}];", 2909 []>; 2910def TLD4_B_2D_U32_F32 2911 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2912 Int32Regs:$v2, Int32Regs:$v3), 2913 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2914 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2915 "[$t, $s, \\{$x, $y\\}];", 2916 []>; 2917def TLD4_A_2D_U32_F32 2918 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2919 Int32Regs:$v2, Int32Regs:$v3), 2920 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2921 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2922 "[$t, $s, \\{$x, $y\\}];", 2923 []>; 2924} 2925 2926 2927// texmode_unified 2928let IsTex = 1, IsTexModeUnified = 1 in { 2929// Texture fetch instructions using handles 2930def TEX_UNIFIED_1D_F32_S32 2931 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2932 Float32Regs:$b, Float32Regs:$a), 2933 (ins Int64Regs:$t, Int32Regs:$x), 2934 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2935 []>; 2936def TEX_UNIFIED_1D_F32_F32 2937 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2938 Float32Regs:$b, Float32Regs:$a), 2939 (ins Int64Regs:$t, Float32Regs:$x), 2940 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2941 []>; 2942def TEX_UNIFIED_1D_F32_F32_LEVEL 2943 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2944 Float32Regs:$b, Float32Regs:$a), 2945 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), 2946 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2947 "[$t, \\{$x\\}], $lod;", 2948 []>; 2949def TEX_UNIFIED_1D_F32_F32_GRAD 2950 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2951 Float32Regs:$b, Float32Regs:$a), 2952 (ins Int64Regs:$t, Float32Regs:$x, 2953 Float32Regs:$gradx, Float32Regs:$grady), 2954 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2955 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2956 []>; 2957def TEX_UNIFIED_1D_S32_S32 2958 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2959 Int32Regs:$b, Int32Regs:$a), 2960 (ins Int64Regs:$t, Int32Regs:$x), 2961 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2962 []>; 2963def TEX_UNIFIED_1D_S32_F32 2964 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2965 Int32Regs:$b, Int32Regs:$a), 2966 (ins Int64Regs:$t, Float32Regs:$x), 2967 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2968 []>; 2969def TEX_UNIFIED_1D_S32_F32_LEVEL 2970 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2971 Int32Regs:$b, Int32Regs:$a), 2972 (ins Int64Regs:$t, Float32Regs:$x, 2973 Float32Regs:$lod), 2974 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2975 "[$t, \\{$x\\}], $lod;", 2976 []>; 2977def TEX_UNIFIED_1D_S32_F32_GRAD 2978 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2979 Int32Regs:$b, Int32Regs:$a), 2980 (ins Int64Regs:$t, Float32Regs:$x, 2981 Float32Regs:$gradx, Float32Regs:$grady), 2982 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2983 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2984 []>; 2985def TEX_UNIFIED_1D_U32_S32 2986 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2987 Int32Regs:$b, Int32Regs:$a), 2988 (ins Int64Regs:$t, Int32Regs:$x), 2989 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2990 []>; 2991def TEX_UNIFIED_1D_U32_F32 2992 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2993 Int32Regs:$b, Int32Regs:$a), 2994 (ins Int64Regs:$t, Float32Regs:$x), 2995 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2996 []>; 2997def TEX_UNIFIED_1D_U32_F32_LEVEL 2998 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2999 Int32Regs:$b, Int32Regs:$a), 3000 (ins Int64Regs:$t, Float32Regs:$x, 3001 Float32Regs:$lod), 3002 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3003 "[$t, \\{$x\\}], $lod;", 3004 []>; 3005def TEX_UNIFIED_1D_U32_F32_GRAD 3006 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3007 Int32Regs:$b, Int32Regs:$a), 3008 (ins Int64Regs:$t, Float32Regs:$x, 3009 Float32Regs:$gradx, Float32Regs:$grady), 3010 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3011 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3012 []>; 3013 3014def TEX_UNIFIED_1D_ARRAY_F32_S32 3015 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3016 Float32Regs:$b, Float32Regs:$a), 3017 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3018 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3019 "[$t, \\{$l, $x\\}];", 3020 []>; 3021def TEX_UNIFIED_1D_ARRAY_F32_F32 3022 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3023 Float32Regs:$b, Float32Regs:$a), 3024 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3025 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3026 "[$t, \\{$l, $x\\}];", 3027 []>; 3028def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3029 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3030 Float32Regs:$b, Float32Regs:$a), 3031 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3032 Float32Regs:$lod), 3033 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3034 "[$t, \\{$l, $x\\}], $lod;", 3035 []>; 3036def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3037 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3038 Float32Regs:$b, Float32Regs:$a), 3039 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3040 Float32Regs:$gradx, Float32Regs:$grady), 3041 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3042 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3043 []>; 3044def TEX_UNIFIED_1D_ARRAY_S32_S32 3045 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3046 Int32Regs:$b, Int32Regs:$a), 3047 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3048 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3049 "[$t, \\{$l, $x\\}];", 3050 []>; 3051def TEX_UNIFIED_1D_ARRAY_S32_F32 3052 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3053 Int32Regs:$b, Int32Regs:$a), 3054 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3055 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3056 "[$t, \\{$l, $x\\}];", 3057 []>; 3058def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3059 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3060 Int32Regs:$b, Int32Regs:$a), 3061 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3062 Float32Regs:$lod), 3063 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3064 "[$t, \\{$l, $x\\}], $lod;", 3065 []>; 3066def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3067 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3068 Int32Regs:$b, Int32Regs:$a), 3069 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3070 Float32Regs:$gradx, Float32Regs:$grady), 3071 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3072 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3073 []>; 3074def TEX_UNIFIED_1D_ARRAY_U32_S32 3075 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3076 Int32Regs:$b, Int32Regs:$a), 3077 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3078 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3079 "[$t, \\{$l, $x\\}];", 3080 []>; 3081def TEX_UNIFIED_1D_ARRAY_U32_F32 3082 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3083 Int32Regs:$b, Int32Regs:$a), 3084 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3085 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3086 "[$t, \\{$l, $x\\}];", 3087 []>; 3088def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3089 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3090 Int32Regs:$b, Int32Regs:$a), 3091 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3092 Float32Regs:$lod), 3093 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3094 "[$t, \\{$l, $x\\}], $lod;", 3095 []>; 3096def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3097 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3098 Int32Regs:$b, Int32Regs:$a), 3099 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3100 Float32Regs:$gradx, Float32Regs:$grady), 3101 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3102 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3103 []>; 3104 3105def TEX_UNIFIED_2D_F32_S32 3106 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3107 Float32Regs:$b, Float32Regs:$a), 3108 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3109 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3110 "[$t, \\{$x, $y\\}];", 3111 []>; 3112def TEX_UNIFIED_2D_F32_F32 3113 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3114 Float32Regs:$b, Float32Regs:$a), 3115 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3116 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3117 "[$t, \\{$x, $y\\}];", 3118 []>; 3119def TEX_UNIFIED_2D_F32_F32_LEVEL 3120 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3121 Float32Regs:$b, Float32Regs:$a), 3122 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3123 Float32Regs:$lod), 3124 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3125 "[$t, \\{$x, $y\\}], $lod;", 3126 []>; 3127def TEX_UNIFIED_2D_F32_F32_GRAD 3128 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3129 Float32Regs:$b, Float32Regs:$a), 3130 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3131 Float32Regs:$gradx0, Float32Regs:$gradx1, 3132 Float32Regs:$grady0, Float32Regs:$grady1), 3133 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3134 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3135 "\\{$grady0, $grady1\\};", 3136 []>; 3137def TEX_UNIFIED_2D_S32_S32 3138 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3139 Int32Regs:$b, Int32Regs:$a), 3140 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3141 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3142 "[$t, \\{$x, $y\\}];", 3143 []>; 3144def TEX_UNIFIED_2D_S32_F32 3145 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3146 Int32Regs:$b, Int32Regs:$a), 3147 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3148 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3149 "[$t, \\{$x, $y\\}];", 3150 []>; 3151def TEX_UNIFIED_2D_S32_F32_LEVEL 3152 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3153 Int32Regs:$b, Int32Regs:$a), 3154 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3155 Float32Regs:$lod), 3156 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3157 "[$t, \\{$x, $y\\}], $lod;", 3158 []>; 3159def TEX_UNIFIED_2D_S32_F32_GRAD 3160 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3161 Int32Regs:$b, Int32Regs:$a), 3162 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3163 Float32Regs:$gradx0, Float32Regs:$gradx1, 3164 Float32Regs:$grady0, Float32Regs:$grady1), 3165 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3166 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3167 "\\{$grady0, $grady1\\};", 3168 []>; 3169def TEX_UNIFIED_2D_U32_S32 3170 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3171 Int32Regs:$b, Int32Regs:$a), 3172 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3173 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3174 "[$t, \\{$x, $y\\}];", 3175 []>; 3176def TEX_UNIFIED_2D_U32_F32 3177 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3178 Int32Regs:$b, Int32Regs:$a), 3179 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3180 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3181 "[$t, \\{$x, $y\\}];", 3182 []>; 3183def TEX_UNIFIED_2D_U32_F32_LEVEL 3184 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3185 Int32Regs:$b, Int32Regs:$a), 3186 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3187 Float32Regs:$lod), 3188 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3189 "[$t, \\{$x, $y\\}], $lod;", 3190 []>; 3191def TEX_UNIFIED_2D_U32_F32_GRAD 3192 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3193 Int32Regs:$b, Int32Regs:$a), 3194 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3195 Float32Regs:$gradx0, Float32Regs:$gradx1, 3196 Float32Regs:$grady0, Float32Regs:$grady1), 3197 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3198 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3199 "\\{$grady0, $grady1\\};", 3200 []>; 3201 3202def TEX_UNIFIED_2D_ARRAY_F32_S32 3203 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3204 Float32Regs:$b, Float32Regs:$a), 3205 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3206 Int32Regs:$y), 3207 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3208 "[$t, \\{$l, $x, $y, $y\\}];", 3209 []>; 3210def TEX_UNIFIED_2D_ARRAY_F32_F32 3211 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3212 Float32Regs:$b, Float32Regs:$a), 3213 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3214 Float32Regs:$y), 3215 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3216 "[$t, \\{$l, $x, $y, $y\\}];", 3217 []>; 3218def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3219 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3220 Float32Regs:$b, Float32Regs:$a), 3221 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3222 Float32Regs:$y, Float32Regs:$lod), 3223 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3224 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3225 []>; 3226def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3227 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3228 Float32Regs:$b, Float32Regs:$a), 3229 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3230 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 3231 Float32Regs:$grady0, Float32Regs:$grady1), 3232 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3233 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3234 "\\{$grady0, $grady1\\};", 3235 []>; 3236def TEX_UNIFIED_2D_ARRAY_S32_S32 3237 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3238 Int32Regs:$b, Int32Regs:$a), 3239 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3240 Int32Regs:$y), 3241 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3242 "[$t, \\{$l, $x, $y, $y\\}];", 3243 []>; 3244def TEX_UNIFIED_2D_ARRAY_S32_F32 3245 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3246 Int32Regs:$b, Int32Regs:$a), 3247 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3248 Float32Regs:$y), 3249 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3250 "[$t, \\{$l, $x, $y, $y\\}];", 3251 []>; 3252def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3253 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3254 Int32Regs:$b, Int32Regs:$a), 3255 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3256 Float32Regs:$y, Float32Regs:$lod), 3257 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3258 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3259 []>; 3260def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3261 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3262 Int32Regs:$b, Int32Regs:$a), 3263 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3264 Float32Regs:$y, 3265 Float32Regs:$gradx0, Float32Regs:$gradx1, 3266 Float32Regs:$grady0, Float32Regs:$grady1), 3267 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3268 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3269 "\\{$grady0, $grady1\\};", 3270 []>; 3271def TEX_UNIFIED_2D_ARRAY_U32_S32 3272 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3273 Int32Regs:$b, Int32Regs:$a), 3274 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3275 Int32Regs:$y), 3276 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3277 "[$t, \\{$l, $x, $y, $y\\}];", 3278 []>; 3279def TEX_UNIFIED_2D_ARRAY_U32_F32 3280 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3281 Int32Regs:$b, Int32Regs:$a), 3282 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3283 Float32Regs:$y), 3284 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3285 "[$t, \\{$l, $x, $y, $y\\}];", 3286 []>; 3287def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3288 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3289 Int32Regs:$b, Int32Regs:$a), 3290 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3291 Float32Regs:$y, Float32Regs:$lod), 3292 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3293 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3294 []>; 3295def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3296 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3297 Int32Regs:$b, Int32Regs:$a), 3298 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3299 Float32Regs:$y, 3300 Float32Regs:$gradx0, Float32Regs:$gradx1, 3301 Float32Regs:$grady0, Float32Regs:$grady1), 3302 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3303 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3304 "\\{$grady0, $grady1\\};", 3305 []>; 3306 3307def TEX_UNIFIED_3D_F32_S32 3308 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3309 Float32Regs:$b, Float32Regs:$a), 3310 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3311 Int32Regs:$z), 3312 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3313 "[$t, \\{$x, $y, $z, $z\\}];", 3314 []>; 3315def TEX_UNIFIED_3D_F32_F32 3316 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3317 Float32Regs:$b, Float32Regs:$a), 3318 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3319 Float32Regs:$z), 3320 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3321 "[$t, \\{$x, $y, $z, $z\\}];", 3322 []>; 3323def TEX_UNIFIED_3D_F32_F32_LEVEL 3324 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3325 Float32Regs:$b, Float32Regs:$a), 3326 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3327 Float32Regs:$z, Float32Regs:$lod), 3328 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3329 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3330 []>; 3331def TEX_UNIFIED_3D_F32_F32_GRAD 3332 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3333 Float32Regs:$b, Float32Regs:$a), 3334 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3335 Float32Regs:$z, 3336 Float32Regs:$gradx0, Float32Regs:$gradx1, 3337 Float32Regs:$gradx2, Float32Regs:$grady0, 3338 Float32Regs:$grady1, Float32Regs:$grady2), 3339 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3340 "[$t, \\{$x, $y, $z, $z\\}], " 3341 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3342 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3343 []>; 3344def TEX_UNIFIED_3D_S32_S32 3345 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3346 Int32Regs:$b, Int32Regs:$a), 3347 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3348 Int32Regs:$z), 3349 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3350 "[$t, \\{$x, $y, $z, $z\\}];", 3351 []>; 3352def TEX_UNIFIED_3D_S32_F32 3353 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3354 Int32Regs:$b, Int32Regs:$a), 3355 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3356 Float32Regs:$z), 3357 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3358 "[$t, \\{$x, $y, $z, $z\\}];", 3359 []>; 3360def TEX_UNIFIED_3D_S32_F32_LEVEL 3361 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3362 Int32Regs:$b, Int32Regs:$a), 3363 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3364 Float32Regs:$z, Float32Regs:$lod), 3365 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3366 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3367 []>; 3368def TEX_UNIFIED_3D_S32_F32_GRAD 3369 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3370 Int32Regs:$b, Int32Regs:$a), 3371 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3372 Float32Regs:$z, 3373 Float32Regs:$gradx0, Float32Regs:$gradx1, 3374 Float32Regs:$gradx2, Float32Regs:$grady0, 3375 Float32Regs:$grady1, Float32Regs:$grady2), 3376 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3377 "[$t, \\{$x, $y, $z, $z\\}], " 3378 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3379 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3380 []>; 3381def TEX_UNIFIED_3D_U32_S32 3382 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3383 Int32Regs:$b, Int32Regs:$a), 3384 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3385 Int32Regs:$z), 3386 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3387 "[$t, \\{$x, $y, $z, $z\\}];", 3388 []>; 3389def TEX_UNIFIED_3D_U32_F32 3390 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3391 Int32Regs:$b, Int32Regs:$a), 3392 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3393 Float32Regs:$z), 3394 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3395 "[$t, \\{$x, $y, $z, $z\\}];", 3396 []>; 3397def TEX_UNIFIED_3D_U32_F32_LEVEL 3398 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3399 Int32Regs:$b, Int32Regs:$a), 3400 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3401 Float32Regs:$z, Float32Regs:$lod), 3402 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3403 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3404 []>; 3405def TEX_UNIFIED_3D_U32_F32_GRAD 3406 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3407 Int32Regs:$b, Int32Regs:$a), 3408 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3409 Float32Regs:$z, 3410 Float32Regs:$gradx0, Float32Regs:$gradx1, 3411 Float32Regs:$gradx2, Float32Regs:$grady0, 3412 Float32Regs:$grady1, Float32Regs:$grady2), 3413 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3414 "[$t, \\{$x, $y, $z, $z\\}], " 3415 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3416 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3417 []>; 3418 3419def TEX_UNIFIED_CUBE_F32_F32 3420 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3421 Float32Regs:$b, Float32Regs:$a), 3422 (ins Int64Regs:$t, 3423 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3424 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3425 "[$t, \\{$x, $y, $z, $z\\}];", 3426 []>; 3427def TEX_UNIFIED_CUBE_F32_F32_LEVEL 3428 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3429 Float32Regs:$b, Float32Regs:$a), 3430 (ins Int64Regs:$t, 3431 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3432 Float32Regs:$lod), 3433 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3434 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3435 []>; 3436def TEX_UNIFIED_CUBE_S32_F32 3437 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3438 Int32Regs:$b, Int32Regs:$a), 3439 (ins Int64Regs:$t, 3440 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3441 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3442 "[$t, \\{$x, $y, $z, $z\\}];", 3443 []>; 3444def TEX_UNIFIED_CUBE_S32_F32_LEVEL 3445 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3446 Int32Regs:$b, Int32Regs:$a), 3447 (ins Int64Regs:$t, 3448 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3449 Float32Regs:$lod), 3450 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3451 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3452 []>; 3453def TEX_UNIFIED_CUBE_U32_F32 3454 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3455 Int32Regs:$b, Int32Regs:$a), 3456 (ins Int64Regs:$t, 3457 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3458 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3459 "[$t, \\{$x, $y, $z, $z\\}];", 3460 []>; 3461def TEX_UNIFIED_CUBE_U32_F32_LEVEL 3462 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3463 Int32Regs:$b, Int32Regs:$a), 3464 (ins Int64Regs:$t, 3465 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3466 Float32Regs:$lod), 3467 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3468 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3469 []>; 3470 3471def TEX_UNIFIED_CUBE_ARRAY_F32_F32 3472 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3473 Float32Regs:$b, Float32Regs:$a), 3474 (ins Int64Regs:$t, Int32Regs:$l, 3475 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3476 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3477 "[$t, \\{$l, $x, $y, $z\\}];", 3478 []>; 3479def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3480 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3481 Float32Regs:$b, Float32Regs:$a), 3482 (ins Int64Regs:$t, Int32Regs:$l, 3483 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3484 Float32Regs:$lod), 3485 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3486 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3487 []>; 3488def TEX_UNIFIED_CUBE_ARRAY_S32_F32 3489 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3490 Int32Regs:$b, Int32Regs:$a), 3491 (ins Int64Regs:$t, Int32Regs:$l, 3492 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3493 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3494 "[$t, \\{$l, $x, $y, $z\\}];", 3495 []>; 3496def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3497 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3498 Int32Regs:$b, Int32Regs:$a), 3499 (ins Int64Regs:$t, Int32Regs:$l, 3500 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3501 Float32Regs:$lod), 3502 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3503 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3504 []>; 3505def TEX_UNIFIED_CUBE_ARRAY_U32_F32 3506 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3507 Int32Regs:$b, Int32Regs:$a), 3508 (ins Int64Regs:$t, Int32Regs:$l, 3509 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3510 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3511 "[$t, \\{$l, $x, $y, $z\\}];", 3512 []>; 3513def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3514 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3515 Int32Regs:$b, Int32Regs:$a), 3516 (ins Int64Regs:$t, Int32Regs:$l, 3517 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3518 Float32Regs:$lod), 3519 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3520 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3521 []>; 3522 3523def TLD4_UNIFIED_R_2D_F32_F32 3524 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3525 Float32Regs:$v2, Float32Regs:$v3), 3526 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3527 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3528 "[$t, \\{$x, $y\\}];", 3529 []>; 3530def TLD4_UNIFIED_G_2D_F32_F32 3531 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3532 Float32Regs:$v2, Float32Regs:$v3), 3533 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3534 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3535 "[$t, \\{$x, $y\\}];", 3536 []>; 3537def TLD4_UNIFIED_B_2D_F32_F32 3538 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3539 Float32Regs:$v2, Float32Regs:$v3), 3540 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3541 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3542 "[$t, \\{$x, $y\\}];", 3543 []>; 3544def TLD4_UNIFIED_A_2D_F32_F32 3545 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3546 Float32Regs:$v2, Float32Regs:$v3), 3547 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3548 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3549 "[$t, \\{$x, $y\\}];", 3550 []>; 3551def TLD4_UNIFIED_R_2D_S32_F32 3552 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3553 Int32Regs:$v2, Int32Regs:$v3), 3554 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3555 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3556 "[$t, \\{$x, $y\\}];", 3557 []>; 3558def TLD4_UNIFIED_G_2D_S32_F32 3559 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3560 Int32Regs:$v2, Int32Regs:$v3), 3561 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3562 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3563 "[$t, \\{$x, $y\\}];", 3564 []>; 3565def TLD4_UNIFIED_B_2D_S32_F32 3566 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3567 Int32Regs:$v2, Int32Regs:$v3), 3568 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3569 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3570 "[$t, \\{$x, $y\\}];", 3571 []>; 3572def TLD4_UNIFIED_A_2D_S32_F32 3573 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3574 Int32Regs:$v2, Int32Regs:$v3), 3575 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3576 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3577 "[$t, \\{$x, $y\\}];", 3578 []>; 3579def TLD4_UNIFIED_R_2D_U32_F32 3580 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3581 Int32Regs:$v2, Int32Regs:$v3), 3582 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3583 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3584 "[$t, \\{$x, $y\\}];", 3585 []>; 3586def TLD4_UNIFIED_G_2D_U32_F32 3587 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3588 Int32Regs:$v2, Int32Regs:$v3), 3589 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3590 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3591 "[$t, \\{$x, $y\\}];", 3592 []>; 3593def TLD4_UNIFIED_B_2D_U32_F32 3594 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3595 Int32Regs:$v2, Int32Regs:$v3), 3596 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3597 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3598 "[$t, \\{$x, $y\\}];", 3599 []>; 3600def TLD4_UNIFIED_A_2D_U32_F32 3601 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3602 Int32Regs:$v2, Int32Regs:$v3), 3603 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3604 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3605 "[$t, \\{$x, $y\\}];", 3606 []>; 3607} 3608 3609 3610 3611//=== Surface load instructions 3612// .clamp variant 3613let IsSuld = 1 in { 3614def SULD_1D_I8_CLAMP 3615 : NVPTXInst<(outs Int16Regs:$r), 3616 (ins Int64Regs:$s, Int32Regs:$x), 3617 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", 3618 []>; 3619def SULD_1D_I16_CLAMP 3620 : NVPTXInst<(outs Int16Regs:$r), 3621 (ins Int64Regs:$s, Int32Regs:$x), 3622 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", 3623 []>; 3624def SULD_1D_I32_CLAMP 3625 : NVPTXInst<(outs Int32Regs:$r), 3626 (ins Int64Regs:$s, Int32Regs:$x), 3627 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", 3628 []>; 3629def SULD_1D_I64_CLAMP 3630 : NVPTXInst<(outs Int64Regs:$r), 3631 (ins Int64Regs:$s, Int32Regs:$x), 3632 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", 3633 []>; 3634 3635def SULD_1D_ARRAY_I8_CLAMP 3636 : NVPTXInst<(outs Int16Regs:$r), 3637 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3638 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3639 []>; 3640def SULD_1D_ARRAY_I16_CLAMP 3641 : NVPTXInst<(outs Int16Regs:$r), 3642 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3643 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3644 []>; 3645def SULD_1D_ARRAY_I32_CLAMP 3646 : NVPTXInst<(outs Int32Regs:$r), 3647 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3648 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3649 []>; 3650def SULD_1D_ARRAY_I64_CLAMP 3651 : NVPTXInst<(outs Int64Regs:$r), 3652 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3653 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3654 []>; 3655 3656def SULD_2D_I8_CLAMP 3657 : NVPTXInst<(outs Int16Regs:$r), 3658 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3659 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3660 []>; 3661def SULD_2D_I16_CLAMP 3662 : NVPTXInst<(outs Int16Regs:$r), 3663 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3664 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3665 []>; 3666def SULD_2D_I32_CLAMP 3667 : NVPTXInst<(outs Int32Regs:$r), 3668 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3669 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3670 []>; 3671def SULD_2D_I64_CLAMP 3672 : NVPTXInst<(outs Int64Regs:$r), 3673 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3674 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3675 []>; 3676 3677def SULD_2D_ARRAY_I8_CLAMP 3678 : NVPTXInst<(outs Int16Regs:$r), 3679 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3680 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3681 []>; 3682def SULD_2D_ARRAY_I16_CLAMP 3683 : NVPTXInst<(outs Int16Regs:$r), 3684 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3685 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3686 []>; 3687def SULD_2D_ARRAY_I32_CLAMP 3688 : NVPTXInst<(outs Int32Regs:$r), 3689 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3690 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3691 []>; 3692def SULD_2D_ARRAY_I64_CLAMP 3693 : NVPTXInst<(outs Int64Regs:$r), 3694 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3695 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3696 []>; 3697 3698def SULD_3D_I8_CLAMP 3699 : NVPTXInst<(outs Int16Regs:$r), 3700 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3701 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3702 []>; 3703def SULD_3D_I16_CLAMP 3704 : NVPTXInst<(outs Int16Regs:$r), 3705 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3706 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3707 []>; 3708def SULD_3D_I32_CLAMP 3709 : NVPTXInst<(outs Int32Regs:$r), 3710 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3711 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3712 []>; 3713def SULD_3D_I64_CLAMP 3714 : NVPTXInst<(outs Int64Regs:$r), 3715 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3716 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3717 []>; 3718} 3719 3720let IsSuld = 2 in { 3721def SULD_1D_V2I8_CLAMP 3722 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3723 (ins Int64Regs:$s, Int32Regs:$x), 3724 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3725 []>; 3726def SULD_1D_V2I16_CLAMP 3727 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3728 (ins Int64Regs:$s, Int32Regs:$x), 3729 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3730 []>; 3731def SULD_1D_V2I32_CLAMP 3732 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3733 (ins Int64Regs:$s, Int32Regs:$x), 3734 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3735 []>; 3736def SULD_1D_V2I64_CLAMP 3737 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3738 (ins Int64Regs:$s, Int32Regs:$x), 3739 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3740 []>; 3741 3742def SULD_1D_ARRAY_V2I8_CLAMP 3743 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3744 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3745 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3746 []>; 3747def SULD_1D_ARRAY_V2I16_CLAMP 3748 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3749 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3750 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3751 []>; 3752def SULD_1D_ARRAY_V2I32_CLAMP 3753 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3754 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3755 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3756 []>; 3757def SULD_1D_ARRAY_V2I64_CLAMP 3758 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3759 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3760 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3761 []>; 3762 3763def SULD_2D_V2I8_CLAMP 3764 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3765 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3766 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3767 []>; 3768def SULD_2D_V2I16_CLAMP 3769 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3770 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3771 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3772 []>; 3773def SULD_2D_V2I32_CLAMP 3774 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3775 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3776 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3777 []>; 3778def SULD_2D_V2I64_CLAMP 3779 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3780 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3781 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3782 []>; 3783 3784def SULD_2D_ARRAY_V2I8_CLAMP 3785 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3786 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3787 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " 3788 "[$s, \\{$l, $x, $y, $y\\}];", 3789 []>; 3790def SULD_2D_ARRAY_V2I16_CLAMP 3791 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3792 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3793 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " 3794 "[$s, \\{$l, $x, $y, $y\\}];", 3795 []>; 3796def SULD_2D_ARRAY_V2I32_CLAMP 3797 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3798 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3799 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " 3800 "[$s, \\{$l, $x, $y, $y\\}];", 3801 []>; 3802def SULD_2D_ARRAY_V2I64_CLAMP 3803 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3804 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3805 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " 3806 "[$s, \\{$l, $x, $y, $y\\}];", 3807 []>; 3808 3809def SULD_3D_V2I8_CLAMP 3810 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3811 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3812 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3813 []>; 3814def SULD_3D_V2I16_CLAMP 3815 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3816 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3817 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3818 []>; 3819def SULD_3D_V2I32_CLAMP 3820 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3821 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3822 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3823 []>; 3824def SULD_3D_V2I64_CLAMP 3825 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3826 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3827 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3828 []>; 3829} 3830 3831let IsSuld = 3 in { 3832def SULD_1D_V4I8_CLAMP 3833 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3834 (ins Int64Regs:$s, Int32Regs:$x), 3835 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3836 []>; 3837def SULD_1D_V4I16_CLAMP 3838 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3839 (ins Int64Regs:$s, Int32Regs:$x), 3840 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3841 []>; 3842def SULD_1D_V4I32_CLAMP 3843 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3844 (ins Int64Regs:$s, Int32Regs:$x), 3845 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3846 []>; 3847 3848def SULD_1D_ARRAY_V4I8_CLAMP 3849 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3850 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3851 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3852 "[$s, \\{$l, $x\\}];", 3853 []>; 3854def SULD_1D_ARRAY_V4I16_CLAMP 3855 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3856 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3857 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3858 "[$s, \\{$l, $x\\}];", 3859 []>; 3860def SULD_1D_ARRAY_V4I32_CLAMP 3861 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3862 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3863 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3864 "[$s, \\{$l, $x\\}];", 3865 []>; 3866 3867def SULD_2D_V4I8_CLAMP 3868 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3869 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3870 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3871 []>; 3872def SULD_2D_V4I16_CLAMP 3873 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3874 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3875 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3876 []>; 3877def SULD_2D_V4I32_CLAMP 3878 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3879 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3880 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3881 []>; 3882 3883def SULD_2D_ARRAY_V4I8_CLAMP 3884 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3885 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3886 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3887 "[$s, \\{$l, $x, $y, $y\\}];", 3888 []>; 3889def SULD_2D_ARRAY_V4I16_CLAMP 3890 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3891 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3892 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3893 "[$s, \\{$l, $x, $y, $y\\}];", 3894 []>; 3895def SULD_2D_ARRAY_V4I32_CLAMP 3896 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3897 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3898 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3899 "[$s, \\{$l, $x, $y, $y\\}];", 3900 []>; 3901 3902 3903def SULD_3D_V4I8_CLAMP 3904 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3905 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3906 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3907 "[$s, \\{$x, $y, $z, $z\\}];", 3908 []>; 3909def SULD_3D_V4I16_CLAMP 3910 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3911 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3912 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3913 "[$s, \\{$x, $y, $z, $z\\}];", 3914 []>; 3915def SULD_3D_V4I32_CLAMP 3916 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3917 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3918 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3919 "[$s, \\{$x, $y, $z, $z\\}];", 3920 []>; 3921} 3922 3923 3924// .trap variant 3925let IsSuld = 1 in { 3926def SULD_1D_I8_TRAP 3927 : NVPTXInst<(outs Int16Regs:$r), 3928 (ins Int64Regs:$s, Int32Regs:$x), 3929 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", 3930 []>; 3931def SULD_1D_I16_TRAP 3932 : NVPTXInst<(outs Int16Regs:$r), 3933 (ins Int64Regs:$s, Int32Regs:$x), 3934 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", 3935 []>; 3936def SULD_1D_I32_TRAP 3937 : NVPTXInst<(outs Int32Regs:$r), 3938 (ins Int64Regs:$s, Int32Regs:$x), 3939 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", 3940 []>; 3941def SULD_1D_I64_TRAP 3942 : NVPTXInst<(outs Int64Regs:$r), 3943 (ins Int64Regs:$s, Int32Regs:$x), 3944 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", 3945 []>; 3946 3947def SULD_1D_ARRAY_I8_TRAP 3948 : NVPTXInst<(outs Int16Regs:$r), 3949 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3950 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 3951 []>; 3952def SULD_1D_ARRAY_I16_TRAP 3953 : NVPTXInst<(outs Int16Regs:$r), 3954 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3955 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 3956 []>; 3957def SULD_1D_ARRAY_I32_TRAP 3958 : NVPTXInst<(outs Int32Regs:$r), 3959 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3960 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 3961 []>; 3962def SULD_1D_ARRAY_I64_TRAP 3963 : NVPTXInst<(outs Int64Regs:$r), 3964 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3965 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 3966 []>; 3967 3968def SULD_2D_I8_TRAP 3969 : NVPTXInst<(outs Int16Regs:$r), 3970 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3971 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 3972 []>; 3973def SULD_2D_I16_TRAP 3974 : NVPTXInst<(outs Int16Regs:$r), 3975 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3976 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 3977 []>; 3978def SULD_2D_I32_TRAP 3979 : NVPTXInst<(outs Int32Regs:$r), 3980 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3981 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 3982 []>; 3983def SULD_2D_I64_TRAP 3984 : NVPTXInst<(outs Int64Regs:$r), 3985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3986 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 3987 []>; 3988 3989def SULD_2D_ARRAY_I8_TRAP 3990 : NVPTXInst<(outs Int16Regs:$r), 3991 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3992 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3993 []>; 3994def SULD_2D_ARRAY_I16_TRAP 3995 : NVPTXInst<(outs Int16Regs:$r), 3996 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3997 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3998 []>; 3999def SULD_2D_ARRAY_I32_TRAP 4000 : NVPTXInst<(outs Int32Regs:$r), 4001 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4002 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4003 []>; 4004def SULD_2D_ARRAY_I64_TRAP 4005 : NVPTXInst<(outs Int64Regs:$r), 4006 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4007 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4008 []>; 4009 4010def SULD_3D_I8_TRAP 4011 : NVPTXInst<(outs Int16Regs:$r), 4012 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4013 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4014 []>; 4015def SULD_3D_I16_TRAP 4016 : NVPTXInst<(outs Int16Regs:$r), 4017 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4018 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4019 []>; 4020def SULD_3D_I32_TRAP 4021 : NVPTXInst<(outs Int32Regs:$r), 4022 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4023 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4024 []>; 4025def SULD_3D_I64_TRAP 4026 : NVPTXInst<(outs Int64Regs:$r), 4027 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4028 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4029 []>; 4030} 4031 4032let IsSuld = 2 in { 4033def SULD_1D_V2I8_TRAP 4034 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4035 (ins Int64Regs:$s, Int32Regs:$x), 4036 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4037 []>; 4038def SULD_1D_V2I16_TRAP 4039 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4040 (ins Int64Regs:$s, Int32Regs:$x), 4041 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4042 []>; 4043def SULD_1D_V2I32_TRAP 4044 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4045 (ins Int64Regs:$s, Int32Regs:$x), 4046 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4047 []>; 4048def SULD_1D_V2I64_TRAP 4049 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4050 (ins Int64Regs:$s, Int32Regs:$x), 4051 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4052 []>; 4053 4054def SULD_1D_ARRAY_V2I8_TRAP 4055 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4056 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4057 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4058 []>; 4059def SULD_1D_ARRAY_V2I16_TRAP 4060 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4061 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4062 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4063 []>; 4064def SULD_1D_ARRAY_V2I32_TRAP 4065 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4066 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4067 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4068 []>; 4069def SULD_1D_ARRAY_V2I64_TRAP 4070 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4071 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4072 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4073 []>; 4074 4075def SULD_2D_V2I8_TRAP 4076 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4077 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4078 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4079 []>; 4080def SULD_2D_V2I16_TRAP 4081 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4082 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4083 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4084 []>; 4085def SULD_2D_V2I32_TRAP 4086 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4087 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4088 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4089 []>; 4090def SULD_2D_V2I64_TRAP 4091 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4092 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4093 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4094 []>; 4095 4096def SULD_2D_ARRAY_V2I8_TRAP 4097 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4098 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4099 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " 4100 "[$s, \\{$l, $x, $y, $y\\}];", 4101 []>; 4102def SULD_2D_ARRAY_V2I16_TRAP 4103 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4104 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4105 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " 4106 "[$s, \\{$l, $x, $y, $y\\}];", 4107 []>; 4108def SULD_2D_ARRAY_V2I32_TRAP 4109 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4110 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4111 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " 4112 "[$s, \\{$l, $x, $y, $y\\}];", 4113 []>; 4114def SULD_2D_ARRAY_V2I64_TRAP 4115 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4116 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4117 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " 4118 "[$s, \\{$l, $x, $y, $y\\}];", 4119 []>; 4120 4121def SULD_3D_V2I8_TRAP 4122 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4123 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4124 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4125 []>; 4126def SULD_3D_V2I16_TRAP 4127 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4128 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4129 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4130 []>; 4131def SULD_3D_V2I32_TRAP 4132 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4133 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4134 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4135 []>; 4136def SULD_3D_V2I64_TRAP 4137 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4138 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4139 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4140 []>; 4141} 4142 4143let IsSuld = 3 in { 4144def SULD_1D_V4I8_TRAP 4145 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4146 (ins Int64Regs:$s, Int32Regs:$x), 4147 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4148 []>; 4149def SULD_1D_V4I16_TRAP 4150 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4151 (ins Int64Regs:$s, Int32Regs:$x), 4152 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4153 []>; 4154def SULD_1D_V4I32_TRAP 4155 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4156 (ins Int64Regs:$s, Int32Regs:$x), 4157 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4158 []>; 4159 4160def SULD_1D_ARRAY_V4I8_TRAP 4161 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4162 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4163 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4164 "[$s, \\{$l, $x\\}];", 4165 []>; 4166def SULD_1D_ARRAY_V4I16_TRAP 4167 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4168 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4169 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4170 "[$s, \\{$l, $x\\}];", 4171 []>; 4172def SULD_1D_ARRAY_V4I32_TRAP 4173 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4174 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4175 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4176 "[$s, \\{$l, $x\\}];", 4177 []>; 4178 4179def SULD_2D_V4I8_TRAP 4180 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4182 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4183 []>; 4184def SULD_2D_V4I16_TRAP 4185 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4186 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4187 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4188 []>; 4189def SULD_2D_V4I32_TRAP 4190 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4192 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4193 []>; 4194 4195def SULD_2D_ARRAY_V4I8_TRAP 4196 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4197 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4198 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4199 "[$s, \\{$l, $x, $y, $y\\}];", 4200 []>; 4201def SULD_2D_ARRAY_V4I16_TRAP 4202 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4203 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4204 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4205 "[$s, \\{$l, $x, $y, $y\\}];", 4206 []>; 4207def SULD_2D_ARRAY_V4I32_TRAP 4208 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4209 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4210 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4211 "[$s, \\{$l, $x, $y, $y\\}];", 4212 []>; 4213 4214 4215def SULD_3D_V4I8_TRAP 4216 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4217 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4218 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4219 "[$s, \\{$x, $y, $z, $z\\}];", 4220 []>; 4221def SULD_3D_V4I16_TRAP 4222 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4223 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4224 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4225 "[$s, \\{$x, $y, $z, $z\\}];", 4226 []>; 4227def SULD_3D_V4I32_TRAP 4228 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4229 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4230 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4231 "[$s, \\{$x, $y, $z, $z\\}];", 4232 []>; 4233} 4234 4235// .zero variant 4236let IsSuld = 1 in { 4237def SULD_1D_I8_ZERO 4238 : NVPTXInst<(outs Int16Regs:$r), 4239 (ins Int64Regs:$s, Int32Regs:$x), 4240 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", 4241 []>; 4242def SULD_1D_I16_ZERO 4243 : NVPTXInst<(outs Int16Regs:$r), 4244 (ins Int64Regs:$s, Int32Regs:$x), 4245 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", 4246 []>; 4247def SULD_1D_I32_ZERO 4248 : NVPTXInst<(outs Int32Regs:$r), 4249 (ins Int64Regs:$s, Int32Regs:$x), 4250 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", 4251 []>; 4252def SULD_1D_I64_ZERO 4253 : NVPTXInst<(outs Int64Regs:$r), 4254 (ins Int64Regs:$s, Int32Regs:$x), 4255 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", 4256 []>; 4257 4258def SULD_1D_ARRAY_I8_ZERO 4259 : NVPTXInst<(outs Int16Regs:$r), 4260 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4261 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4262 []>; 4263def SULD_1D_ARRAY_I16_ZERO 4264 : NVPTXInst<(outs Int16Regs:$r), 4265 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4266 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4267 []>; 4268def SULD_1D_ARRAY_I32_ZERO 4269 : NVPTXInst<(outs Int32Regs:$r), 4270 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4271 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4272 []>; 4273def SULD_1D_ARRAY_I64_ZERO 4274 : NVPTXInst<(outs Int64Regs:$r), 4275 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4276 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4277 []>; 4278 4279def SULD_2D_I8_ZERO 4280 : NVPTXInst<(outs Int16Regs:$r), 4281 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4282 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4283 []>; 4284def SULD_2D_I16_ZERO 4285 : NVPTXInst<(outs Int16Regs:$r), 4286 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4287 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4288 []>; 4289def SULD_2D_I32_ZERO 4290 : NVPTXInst<(outs Int32Regs:$r), 4291 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4292 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4293 []>; 4294def SULD_2D_I64_ZERO 4295 : NVPTXInst<(outs Int64Regs:$r), 4296 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4297 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4298 []>; 4299 4300def SULD_2D_ARRAY_I8_ZERO 4301 : NVPTXInst<(outs Int16Regs:$r), 4302 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4303 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4304 []>; 4305def SULD_2D_ARRAY_I16_ZERO 4306 : NVPTXInst<(outs Int16Regs:$r), 4307 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4308 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4309 []>; 4310def SULD_2D_ARRAY_I32_ZERO 4311 : NVPTXInst<(outs Int32Regs:$r), 4312 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4313 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4314 []>; 4315def SULD_2D_ARRAY_I64_ZERO 4316 : NVPTXInst<(outs Int64Regs:$r), 4317 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4318 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4319 []>; 4320 4321def SULD_3D_I8_ZERO 4322 : NVPTXInst<(outs Int16Regs:$r), 4323 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4324 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4325 []>; 4326def SULD_3D_I16_ZERO 4327 : NVPTXInst<(outs Int16Regs:$r), 4328 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4329 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4330 []>; 4331def SULD_3D_I32_ZERO 4332 : NVPTXInst<(outs Int32Regs:$r), 4333 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4334 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4335 []>; 4336def SULD_3D_I64_ZERO 4337 : NVPTXInst<(outs Int64Regs:$r), 4338 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4339 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4340 []>; 4341} 4342 4343let IsSuld = 2 in { 4344def SULD_1D_V2I8_ZERO 4345 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4346 (ins Int64Regs:$s, Int32Regs:$x), 4347 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4348 []>; 4349def SULD_1D_V2I16_ZERO 4350 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4351 (ins Int64Regs:$s, Int32Regs:$x), 4352 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4353 []>; 4354def SULD_1D_V2I32_ZERO 4355 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4356 (ins Int64Regs:$s, Int32Regs:$x), 4357 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4358 []>; 4359def SULD_1D_V2I64_ZERO 4360 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4361 (ins Int64Regs:$s, Int32Regs:$x), 4362 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4363 []>; 4364 4365def SULD_1D_ARRAY_V2I8_ZERO 4366 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4367 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4368 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4369 []>; 4370def SULD_1D_ARRAY_V2I16_ZERO 4371 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4372 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4373 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4374 []>; 4375def SULD_1D_ARRAY_V2I32_ZERO 4376 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4377 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4378 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4379 []>; 4380def SULD_1D_ARRAY_V2I64_ZERO 4381 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4382 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4383 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4384 []>; 4385 4386def SULD_2D_V2I8_ZERO 4387 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4388 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4389 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4390 []>; 4391def SULD_2D_V2I16_ZERO 4392 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4393 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4394 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4395 []>; 4396def SULD_2D_V2I32_ZERO 4397 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4398 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4399 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4400 []>; 4401def SULD_2D_V2I64_ZERO 4402 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4403 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4404 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4405 []>; 4406 4407def SULD_2D_ARRAY_V2I8_ZERO 4408 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4409 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4410 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " 4411 "[$s, \\{$l, $x, $y, $y\\}];", 4412 []>; 4413def SULD_2D_ARRAY_V2I16_ZERO 4414 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4415 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4416 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " 4417 "[$s, \\{$l, $x, $y, $y\\}];", 4418 []>; 4419def SULD_2D_ARRAY_V2I32_ZERO 4420 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4421 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4422 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " 4423 "[$s, \\{$l, $x, $y, $y\\}];", 4424 []>; 4425def SULD_2D_ARRAY_V2I64_ZERO 4426 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4427 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4428 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " 4429 "[$s, \\{$l, $x, $y, $y\\}];", 4430 []>; 4431 4432def SULD_3D_V2I8_ZERO 4433 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4434 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4435 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4436 []>; 4437def SULD_3D_V2I16_ZERO 4438 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4439 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4440 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4441 []>; 4442def SULD_3D_V2I32_ZERO 4443 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4444 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4445 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4446 []>; 4447def SULD_3D_V2I64_ZERO 4448 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4449 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4450 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4451 []>; 4452} 4453 4454let IsSuld = 3 in { 4455def SULD_1D_V4I8_ZERO 4456 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4457 (ins Int64Regs:$s, Int32Regs:$x), 4458 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4459 []>; 4460def SULD_1D_V4I16_ZERO 4461 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4462 (ins Int64Regs:$s, Int32Regs:$x), 4463 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4464 []>; 4465def SULD_1D_V4I32_ZERO 4466 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4467 (ins Int64Regs:$s, Int32Regs:$x), 4468 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4469 []>; 4470 4471def SULD_1D_ARRAY_V4I8_ZERO 4472 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4473 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4474 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4475 "[$s, \\{$l, $x\\}];", 4476 []>; 4477def SULD_1D_ARRAY_V4I16_ZERO 4478 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4479 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4480 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4481 "[$s, \\{$l, $x\\}];", 4482 []>; 4483def SULD_1D_ARRAY_V4I32_ZERO 4484 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4485 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4486 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4487 "[$s, \\{$l, $x\\}];", 4488 []>; 4489 4490def SULD_2D_V4I8_ZERO 4491 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4492 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4493 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4494 []>; 4495def SULD_2D_V4I16_ZERO 4496 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4497 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4498 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4499 []>; 4500def SULD_2D_V4I32_ZERO 4501 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4502 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4503 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4504 []>; 4505 4506def SULD_2D_ARRAY_V4I8_ZERO 4507 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4508 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4509 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4510 "[$s, \\{$l, $x, $y, $y\\}];", 4511 []>; 4512def SULD_2D_ARRAY_V4I16_ZERO 4513 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4514 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4515 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4516 "[$s, \\{$l, $x, $y, $y\\}];", 4517 []>; 4518def SULD_2D_ARRAY_V4I32_ZERO 4519 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4520 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4521 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4522 "[$s, \\{$l, $x, $y, $y\\}];", 4523 []>; 4524 4525 4526def SULD_3D_V4I8_ZERO 4527 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4528 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4529 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4530 "[$s, \\{$x, $y, $z, $z\\}];", 4531 []>; 4532def SULD_3D_V4I16_ZERO 4533 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4534 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4535 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4536 "[$s, \\{$x, $y, $z, $z\\}];", 4537 []>; 4538def SULD_3D_V4I32_ZERO 4539 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4540 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4541 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4542 "[$s, \\{$x, $y, $z, $z\\}];", 4543 []>; 4544} 4545 4546//----------------------------------- 4547// Texture Query Intrinsics 4548//----------------------------------- 4549 4550let IsSurfTexQuery = 1 in { 4551def TXQ_CHANNEL_ORDER 4552 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4553 "txq.channel_order.b32 \t$d, [$a];", 4554 []>; 4555def TXQ_CHANNEL_DATA_TYPE 4556 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4557 "txq.channel_data_type.b32 \t$d, [$a];", 4558 []>; 4559def TXQ_WIDTH 4560 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4561 "txq.width.b32 \t$d, [$a];", 4562 []>; 4563def TXQ_HEIGHT 4564 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4565 "txq.height.b32 \t$d, [$a];", 4566 []>; 4567def TXQ_DEPTH 4568 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4569 "txq.depth.b32 \t$d, [$a];", 4570 []>; 4571def TXQ_ARRAY_SIZE 4572 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4573 "txq.array_size.b32 \t$d, [$a];", 4574 []>; 4575def TXQ_NUM_SAMPLES 4576 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4577 "txq.num_samples.b32 \t$d, [$a];", 4578 []>; 4579def TXQ_NUM_MIPMAP_LEVELS 4580 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4581 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4582 []>; 4583} 4584 4585def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4586 (TXQ_CHANNEL_ORDER Int64Regs:$a)>; 4587def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4588 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4589def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4590 (TXQ_WIDTH Int64Regs:$a)>; 4591def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4592 (TXQ_HEIGHT Int64Regs:$a)>; 4593def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4594 (TXQ_DEPTH Int64Regs:$a)>; 4595def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4596 (TXQ_ARRAY_SIZE Int64Regs:$a)>; 4597def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4598 (TXQ_NUM_SAMPLES Int64Regs:$a)>; 4599def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4600 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; 4601 4602 4603//----------------------------------- 4604// Surface Query Intrinsics 4605//----------------------------------- 4606 4607let IsSurfTexQuery = 1 in { 4608def SUQ_CHANNEL_ORDER 4609 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4610 "suq.channel_order.b32 \t$d, [$a];", 4611 []>; 4612def SUQ_CHANNEL_DATA_TYPE 4613 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4614 "suq.channel_data_type.b32 \t$d, [$a];", 4615 []>; 4616def SUQ_WIDTH 4617 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4618 "suq.width.b32 \t$d, [$a];", 4619 []>; 4620def SUQ_HEIGHT 4621 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4622 "suq.height.b32 \t$d, [$a];", 4623 []>; 4624def SUQ_DEPTH 4625 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4626 "suq.depth.b32 \t$d, [$a];", 4627 []>; 4628def SUQ_ARRAY_SIZE 4629 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4630 "suq.array_size.b32 \t$d, [$a];", 4631 []>; 4632} 4633 4634def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4635 (SUQ_CHANNEL_ORDER Int64Regs:$a)>; 4636def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4637 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4638def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4639 (SUQ_WIDTH Int64Regs:$a)>; 4640def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4641 (SUQ_HEIGHT Int64Regs:$a)>; 4642def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4643 (SUQ_DEPTH Int64Regs:$a)>; 4644def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4645 (SUQ_ARRAY_SIZE Int64Regs:$a)>; 4646 4647 4648//===- Handle Query -------------------------------------------------------===// 4649 4650// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4651def ISTYPEP_SAMPLER 4652 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4653 "istypep.samplerref \t$d, $a;", 4654 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4655def ISTYPEP_SURFACE 4656 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4657 "istypep.surfref \t$d, $a;", 4658 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4659def ISTYPEP_TEXTURE 4660 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4661 "istypep.texref \t$d, $a;", 4662 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4663 4664//===- Surface Stores -----------------------------------------------------===// 4665 4666let IsSust = 1 in { 4667// Unformatted 4668// .clamp variant 4669def SUST_B_1D_B8_CLAMP 4670 : NVPTXInst<(outs), 4671 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4672 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4673 []>; 4674def SUST_B_1D_B16_CLAMP 4675 : NVPTXInst<(outs), 4676 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4677 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4678 []>; 4679def SUST_B_1D_B32_CLAMP 4680 : NVPTXInst<(outs), 4681 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4682 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4683 []>; 4684def SUST_B_1D_B64_CLAMP 4685 : NVPTXInst<(outs), 4686 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4687 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4688 []>; 4689def SUST_B_1D_V2B8_CLAMP 4690 : NVPTXInst<(outs), 4691 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4692 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4693 []>; 4694def SUST_B_1D_V2B16_CLAMP 4695 : NVPTXInst<(outs), 4696 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4697 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4698 []>; 4699def SUST_B_1D_V2B32_CLAMP 4700 : NVPTXInst<(outs), 4701 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4702 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4703 []>; 4704def SUST_B_1D_V2B64_CLAMP 4705 : NVPTXInst<(outs), 4706 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4707 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4708 []>; 4709def SUST_B_1D_V4B8_CLAMP 4710 : NVPTXInst<(outs), 4711 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4712 Int16Regs:$b, Int16Regs:$a), 4713 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4714 []>; 4715def SUST_B_1D_V4B16_CLAMP 4716 : NVPTXInst<(outs), 4717 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4718 Int16Regs:$b, Int16Regs:$a), 4719 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4720 []>; 4721def SUST_B_1D_V4B32_CLAMP 4722 : NVPTXInst<(outs), 4723 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 4724 Int32Regs:$b, Int32Regs:$a), 4725 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4726 []>; 4727 4728 4729def SUST_B_1D_ARRAY_B8_CLAMP 4730 : NVPTXInst<(outs), 4731 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4732 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4733 []>; 4734def SUST_B_1D_ARRAY_B16_CLAMP 4735 : NVPTXInst<(outs), 4736 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4737 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4738 []>; 4739def SUST_B_1D_ARRAY_B32_CLAMP 4740 : NVPTXInst<(outs), 4741 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 4742 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4743 []>; 4744def SUST_B_1D_ARRAY_B64_CLAMP 4745 : NVPTXInst<(outs), 4746 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 4747 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4748 []>; 4749def SUST_B_1D_ARRAY_V2B8_CLAMP 4750 : NVPTXInst<(outs), 4751 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4752 Int16Regs:$g), 4753 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4754 []>; 4755def SUST_B_1D_ARRAY_V2B16_CLAMP 4756 : NVPTXInst<(outs), 4757 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4758 Int16Regs:$g), 4759 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4760 []>; 4761def SUST_B_1D_ARRAY_V2B32_CLAMP 4762 : NVPTXInst<(outs), 4763 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 4764 Int32Regs:$g), 4765 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4766 []>; 4767def SUST_B_1D_ARRAY_V2B64_CLAMP 4768 : NVPTXInst<(outs), 4769 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 4770 Int64Regs:$g), 4771 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4772 []>; 4773def SUST_B_1D_ARRAY_V4B8_CLAMP 4774 : NVPTXInst<(outs), 4775 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4776 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4777 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " 4778 "\\{$r, $g, $b, $a\\};", 4779 []>; 4780def SUST_B_1D_ARRAY_V4B16_CLAMP 4781 : NVPTXInst<(outs), 4782 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4783 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4784 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " 4785 "\\{$r, $g, $b, $a\\};", 4786 []>; 4787def SUST_B_1D_ARRAY_V4B32_CLAMP 4788 : NVPTXInst<(outs), 4789 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 4790 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4791 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " 4792 "\\{$r, $g, $b, $a\\};", 4793 []>; 4794 4795 4796def SUST_B_2D_B8_CLAMP 4797 : NVPTXInst<(outs), 4798 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4799 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4800 []>; 4801def SUST_B_2D_B16_CLAMP 4802 : NVPTXInst<(outs), 4803 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4804 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4805 []>; 4806def SUST_B_2D_B32_CLAMP 4807 : NVPTXInst<(outs), 4808 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 4809 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4810 []>; 4811def SUST_B_2D_B64_CLAMP 4812 : NVPTXInst<(outs), 4813 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 4814 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4815 []>; 4816def SUST_B_2D_V2B8_CLAMP 4817 : NVPTXInst<(outs), 4818 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4819 Int16Regs:$g), 4820 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4821 []>; 4822def SUST_B_2D_V2B16_CLAMP 4823 : NVPTXInst<(outs), 4824 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4825 Int16Regs:$g), 4826 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4827 []>; 4828def SUST_B_2D_V2B32_CLAMP 4829 : NVPTXInst<(outs), 4830 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 4831 Int32Regs:$g), 4832 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4833 []>; 4834def SUST_B_2D_V2B64_CLAMP 4835 : NVPTXInst<(outs), 4836 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 4837 Int64Regs:$g), 4838 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4839 []>; 4840def SUST_B_2D_V4B8_CLAMP 4841 : NVPTXInst<(outs), 4842 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4843 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4844 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " 4845 "\\{$r, $g, $b, $a\\};", 4846 []>; 4847def SUST_B_2D_V4B16_CLAMP 4848 : NVPTXInst<(outs), 4849 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4850 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4851 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " 4852 "\\{$r, $g, $b, $a\\};", 4853 []>; 4854def SUST_B_2D_V4B32_CLAMP 4855 : NVPTXInst<(outs), 4856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 4857 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4858 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " 4859 "\\{$r, $g, $b, $a\\};", 4860 []>; 4861 4862 4863def SUST_B_2D_ARRAY_B8_CLAMP 4864 : NVPTXInst<(outs), 4865 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4866 Int16Regs:$r), 4867 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4868 []>; 4869def SUST_B_2D_ARRAY_B16_CLAMP 4870 : NVPTXInst<(outs), 4871 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4872 Int16Regs:$r), 4873 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4874 []>; 4875def SUST_B_2D_ARRAY_B32_CLAMP 4876 : NVPTXInst<(outs), 4877 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4878 Int32Regs:$r), 4879 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4880 []>; 4881def SUST_B_2D_ARRAY_B64_CLAMP 4882 : NVPTXInst<(outs), 4883 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4884 Int64Regs:$r), 4885 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4886 []>; 4887def SUST_B_2D_ARRAY_V2B8_CLAMP 4888 : NVPTXInst<(outs), 4889 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4890 Int16Regs:$r, Int16Regs:$g), 4891 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4892 "\\{$r, $g\\};", 4893 []>; 4894def SUST_B_2D_ARRAY_V2B16_CLAMP 4895 : NVPTXInst<(outs), 4896 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4897 Int16Regs:$r, Int16Regs:$g), 4898 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4899 "\\{$r, $g\\};", 4900 []>; 4901def SUST_B_2D_ARRAY_V2B32_CLAMP 4902 : NVPTXInst<(outs), 4903 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4904 Int32Regs:$r, Int32Regs:$g), 4905 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4906 "\\{$r, $g\\};", 4907 []>; 4908def SUST_B_2D_ARRAY_V2B64_CLAMP 4909 : NVPTXInst<(outs), 4910 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4911 Int64Regs:$r, Int64Regs:$g), 4912 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4913 "\\{$r, $g\\};", 4914 []>; 4915def SUST_B_2D_ARRAY_V4B8_CLAMP 4916 : NVPTXInst<(outs), 4917 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4918 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4919 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4920 "\\{$r, $g, $b, $a\\};", 4921 []>; 4922def SUST_B_2D_ARRAY_V4B16_CLAMP 4923 : NVPTXInst<(outs), 4924 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4925 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4926 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4927 "\\{$r, $g, $b, $a\\};", 4928 []>; 4929def SUST_B_2D_ARRAY_V4B32_CLAMP 4930 : NVPTXInst<(outs), 4931 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4932 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4933 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4934 "\\{$r, $g, $b, $a\\};", 4935 []>; 4936 4937 4938def SUST_B_3D_B8_CLAMP 4939 : NVPTXInst<(outs), 4940 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4941 Int16Regs:$r), 4942 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4943 []>; 4944def SUST_B_3D_B16_CLAMP 4945 : NVPTXInst<(outs), 4946 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4947 Int16Regs:$r), 4948 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4949 []>; 4950def SUST_B_3D_B32_CLAMP 4951 : NVPTXInst<(outs), 4952 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4953 Int32Regs:$r), 4954 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4955 []>; 4956def SUST_B_3D_B64_CLAMP 4957 : NVPTXInst<(outs), 4958 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4959 Int64Regs:$r), 4960 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4961 []>; 4962def SUST_B_3D_V2B8_CLAMP 4963 : NVPTXInst<(outs), 4964 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4965 Int16Regs:$r, Int16Regs:$g), 4966 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 4967 "\\{$r, $g\\};", 4968 []>; 4969def SUST_B_3D_V2B16_CLAMP 4970 : NVPTXInst<(outs), 4971 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4972 Int16Regs:$r, Int16Regs:$g), 4973 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 4974 "\\{$r, $g\\};", 4975 []>; 4976def SUST_B_3D_V2B32_CLAMP 4977 : NVPTXInst<(outs), 4978 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4979 Int32Regs:$r, Int32Regs:$g), 4980 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 4981 "\\{$r, $g\\};", 4982 []>; 4983def SUST_B_3D_V2B64_CLAMP 4984 : NVPTXInst<(outs), 4985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4986 Int64Regs:$r, Int64Regs:$g), 4987 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 4988 "\\{$r, $g\\};", 4989 []>; 4990def SUST_B_3D_V4B8_CLAMP 4991 : NVPTXInst<(outs), 4992 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4993 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4994 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 4995 "\\{$r, $g, $b, $a\\};", 4996 []>; 4997def SUST_B_3D_V4B16_CLAMP 4998 : NVPTXInst<(outs), 4999 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5000 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5001 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5002 "\\{$r, $g, $b, $a\\};", 5003 []>; 5004def SUST_B_3D_V4B32_CLAMP 5005 : NVPTXInst<(outs), 5006 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5007 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5008 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5009 "\\{$r, $g, $b, $a\\};", 5010 []>; 5011 5012 5013// .trap variant 5014def SUST_B_1D_B8_TRAP 5015 : NVPTXInst<(outs), 5016 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5017 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5018 []>; 5019def SUST_B_1D_B16_TRAP 5020 : NVPTXInst<(outs), 5021 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5022 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5023 []>; 5024def SUST_B_1D_B32_TRAP 5025 : NVPTXInst<(outs), 5026 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5027 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5028 []>; 5029def SUST_B_1D_B64_TRAP 5030 : NVPTXInst<(outs), 5031 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5032 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", 5033 []>; 5034def SUST_B_1D_V2B8_TRAP 5035 : NVPTXInst<(outs), 5036 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5037 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5038 []>; 5039def SUST_B_1D_V2B16_TRAP 5040 : NVPTXInst<(outs), 5041 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5042 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5043 []>; 5044def SUST_B_1D_V2B32_TRAP 5045 : NVPTXInst<(outs), 5046 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5047 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5048 []>; 5049def SUST_B_1D_V2B64_TRAP 5050 : NVPTXInst<(outs), 5051 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5052 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5053 []>; 5054def SUST_B_1D_V4B8_TRAP 5055 : NVPTXInst<(outs), 5056 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5057 Int16Regs:$b, Int16Regs:$a), 5058 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5059 []>; 5060def SUST_B_1D_V4B16_TRAP 5061 : NVPTXInst<(outs), 5062 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5063 Int16Regs:$b, Int16Regs:$a), 5064 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5065 []>; 5066def SUST_B_1D_V4B32_TRAP 5067 : NVPTXInst<(outs), 5068 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5069 Int32Regs:$b, Int32Regs:$a), 5070 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5071 []>; 5072 5073 5074def SUST_B_1D_ARRAY_B8_TRAP 5075 : NVPTXInst<(outs), 5076 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5077 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5078 []>; 5079def SUST_B_1D_ARRAY_B16_TRAP 5080 : NVPTXInst<(outs), 5081 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5082 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5083 []>; 5084def SUST_B_1D_ARRAY_B32_TRAP 5085 : NVPTXInst<(outs), 5086 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5087 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5088 []>; 5089def SUST_B_1D_ARRAY_B64_TRAP 5090 : NVPTXInst<(outs), 5091 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5092 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5093 []>; 5094def SUST_B_1D_ARRAY_V2B8_TRAP 5095 : NVPTXInst<(outs), 5096 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5097 Int16Regs:$g), 5098 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5099 []>; 5100def SUST_B_1D_ARRAY_V2B16_TRAP 5101 : NVPTXInst<(outs), 5102 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5103 Int16Regs:$g), 5104 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5105 []>; 5106def SUST_B_1D_ARRAY_V2B32_TRAP 5107 : NVPTXInst<(outs), 5108 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5109 Int32Regs:$g), 5110 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5111 []>; 5112def SUST_B_1D_ARRAY_V2B64_TRAP 5113 : NVPTXInst<(outs), 5114 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5115 Int64Regs:$g), 5116 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5117 []>; 5118def SUST_B_1D_ARRAY_V4B8_TRAP 5119 : NVPTXInst<(outs), 5120 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5121 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5122 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 5123 "\\{$r, $g, $b, $a\\};", 5124 []>; 5125def SUST_B_1D_ARRAY_V4B16_TRAP 5126 : NVPTXInst<(outs), 5127 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5128 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5129 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 5130 "\\{$r, $g, $b, $a\\};", 5131 []>; 5132def SUST_B_1D_ARRAY_V4B32_TRAP 5133 : NVPTXInst<(outs), 5134 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5135 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5136 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 5137 "\\{$r, $g, $b, $a\\};", 5138 []>; 5139 5140 5141def SUST_B_2D_B8_TRAP 5142 : NVPTXInst<(outs), 5143 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5144 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5145 []>; 5146def SUST_B_2D_B16_TRAP 5147 : NVPTXInst<(outs), 5148 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5149 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5150 []>; 5151def SUST_B_2D_B32_TRAP 5152 : NVPTXInst<(outs), 5153 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5154 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5155 []>; 5156def SUST_B_2D_B64_TRAP 5157 : NVPTXInst<(outs), 5158 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5159 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5160 []>; 5161def SUST_B_2D_V2B8_TRAP 5162 : NVPTXInst<(outs), 5163 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5164 Int16Regs:$g), 5165 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5166 []>; 5167def SUST_B_2D_V2B16_TRAP 5168 : NVPTXInst<(outs), 5169 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5170 Int16Regs:$g), 5171 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5172 []>; 5173def SUST_B_2D_V2B32_TRAP 5174 : NVPTXInst<(outs), 5175 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5176 Int32Regs:$g), 5177 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5178 []>; 5179def SUST_B_2D_V2B64_TRAP 5180 : NVPTXInst<(outs), 5181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5182 Int64Regs:$g), 5183 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5184 []>; 5185def SUST_B_2D_V4B8_TRAP 5186 : NVPTXInst<(outs), 5187 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5188 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5189 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 5190 "\\{$r, $g, $b, $a\\};", 5191 []>; 5192def SUST_B_2D_V4B16_TRAP 5193 : NVPTXInst<(outs), 5194 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5195 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5196 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 5197 "\\{$r, $g, $b, $a\\};", 5198 []>; 5199def SUST_B_2D_V4B32_TRAP 5200 : NVPTXInst<(outs), 5201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5202 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5203 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 5204 "\\{$r, $g, $b, $a\\};", 5205 []>; 5206 5207 5208def SUST_B_2D_ARRAY_B8_TRAP 5209 : NVPTXInst<(outs), 5210 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5211 Int16Regs:$r), 5212 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5213 []>; 5214def SUST_B_2D_ARRAY_B16_TRAP 5215 : NVPTXInst<(outs), 5216 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5217 Int16Regs:$r), 5218 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5219 []>; 5220def SUST_B_2D_ARRAY_B32_TRAP 5221 : NVPTXInst<(outs), 5222 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5223 Int32Regs:$r), 5224 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5225 []>; 5226def SUST_B_2D_ARRAY_B64_TRAP 5227 : NVPTXInst<(outs), 5228 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5229 Int64Regs:$r), 5230 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5231 []>; 5232def SUST_B_2D_ARRAY_V2B8_TRAP 5233 : NVPTXInst<(outs), 5234 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5235 Int16Regs:$r, Int16Regs:$g), 5236 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5237 "\\{$r, $g\\};", 5238 []>; 5239def SUST_B_2D_ARRAY_V2B16_TRAP 5240 : NVPTXInst<(outs), 5241 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5242 Int16Regs:$r, Int16Regs:$g), 5243 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5244 "\\{$r, $g\\};", 5245 []>; 5246def SUST_B_2D_ARRAY_V2B32_TRAP 5247 : NVPTXInst<(outs), 5248 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5249 Int32Regs:$r, Int32Regs:$g), 5250 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5251 "\\{$r, $g\\};", 5252 []>; 5253def SUST_B_2D_ARRAY_V2B64_TRAP 5254 : NVPTXInst<(outs), 5255 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5256 Int64Regs:$r, Int64Regs:$g), 5257 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5258 "\\{$r, $g\\};", 5259 []>; 5260def SUST_B_2D_ARRAY_V4B8_TRAP 5261 : NVPTXInst<(outs), 5262 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5263 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5264 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5265 "\\{$r, $g, $b, $a\\};", 5266 []>; 5267def SUST_B_2D_ARRAY_V4B16_TRAP 5268 : NVPTXInst<(outs), 5269 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5270 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5271 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5272 "\\{$r, $g, $b, $a\\};", 5273 []>; 5274def SUST_B_2D_ARRAY_V4B32_TRAP 5275 : NVPTXInst<(outs), 5276 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5277 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5278 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5279 "\\{$r, $g, $b, $a\\};", 5280 []>; 5281 5282 5283def SUST_B_3D_B8_TRAP 5284 : NVPTXInst<(outs), 5285 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5286 Int16Regs:$r), 5287 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5288 []>; 5289def SUST_B_3D_B16_TRAP 5290 : NVPTXInst<(outs), 5291 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5292 Int16Regs:$r), 5293 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5294 []>; 5295def SUST_B_3D_B32_TRAP 5296 : NVPTXInst<(outs), 5297 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5298 Int32Regs:$r), 5299 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5300 []>; 5301def SUST_B_3D_B64_TRAP 5302 : NVPTXInst<(outs), 5303 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5304 Int64Regs:$r), 5305 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5306 []>; 5307def SUST_B_3D_V2B8_TRAP 5308 : NVPTXInst<(outs), 5309 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5310 Int16Regs:$r, Int16Regs:$g), 5311 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5312 "\\{$r, $g\\};", 5313 []>; 5314def SUST_B_3D_V2B16_TRAP 5315 : NVPTXInst<(outs), 5316 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5317 Int16Regs:$r, Int16Regs:$g), 5318 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5319 "\\{$r, $g\\};", 5320 []>; 5321def SUST_B_3D_V2B32_TRAP 5322 : NVPTXInst<(outs), 5323 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5324 Int32Regs:$r, Int32Regs:$g), 5325 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5326 "\\{$r, $g\\};", 5327 []>; 5328def SUST_B_3D_V2B64_TRAP 5329 : NVPTXInst<(outs), 5330 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5331 Int64Regs:$r, Int64Regs:$g), 5332 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5333 "\\{$r, $g\\};", 5334 []>; 5335def SUST_B_3D_V4B8_TRAP 5336 : NVPTXInst<(outs), 5337 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5338 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5339 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5340 "\\{$r, $g, $b, $a\\};", 5341 []>; 5342def SUST_B_3D_V4B16_TRAP 5343 : NVPTXInst<(outs), 5344 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5345 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5346 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5347 "\\{$r, $g, $b, $a\\};", 5348 []>; 5349def SUST_B_3D_V4B32_TRAP 5350 : NVPTXInst<(outs), 5351 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5352 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5353 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5354 "\\{$r, $g, $b, $a\\};", 5355 []>; 5356 5357 5358// .zero variant 5359def SUST_B_1D_B8_ZERO 5360 : NVPTXInst<(outs), 5361 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5362 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", 5363 []>; 5364def SUST_B_1D_B16_ZERO 5365 : NVPTXInst<(outs), 5366 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5367 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", 5368 []>; 5369def SUST_B_1D_B32_ZERO 5370 : NVPTXInst<(outs), 5371 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5372 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", 5373 []>; 5374def SUST_B_1D_B64_ZERO 5375 : NVPTXInst<(outs), 5376 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5377 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", 5378 []>; 5379def SUST_B_1D_V2B8_ZERO 5380 : NVPTXInst<(outs), 5381 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5382 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5383 []>; 5384def SUST_B_1D_V2B16_ZERO 5385 : NVPTXInst<(outs), 5386 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5387 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5388 []>; 5389def SUST_B_1D_V2B32_ZERO 5390 : NVPTXInst<(outs), 5391 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5392 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5393 []>; 5394def SUST_B_1D_V2B64_ZERO 5395 : NVPTXInst<(outs), 5396 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5397 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5398 []>; 5399def SUST_B_1D_V4B8_ZERO 5400 : NVPTXInst<(outs), 5401 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5402 Int16Regs:$b, Int16Regs:$a), 5403 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5404 []>; 5405def SUST_B_1D_V4B16_ZERO 5406 : NVPTXInst<(outs), 5407 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5408 Int16Regs:$b, Int16Regs:$a), 5409 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5410 []>; 5411def SUST_B_1D_V4B32_ZERO 5412 : NVPTXInst<(outs), 5413 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5414 Int32Regs:$b, Int32Regs:$a), 5415 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5416 []>; 5417 5418 5419def SUST_B_1D_ARRAY_B8_ZERO 5420 : NVPTXInst<(outs), 5421 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5422 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5423 []>; 5424def SUST_B_1D_ARRAY_B16_ZERO 5425 : NVPTXInst<(outs), 5426 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5427 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5428 []>; 5429def SUST_B_1D_ARRAY_B32_ZERO 5430 : NVPTXInst<(outs), 5431 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5432 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5433 []>; 5434def SUST_B_1D_ARRAY_B64_ZERO 5435 : NVPTXInst<(outs), 5436 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5437 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5438 []>; 5439def SUST_B_1D_ARRAY_V2B8_ZERO 5440 : NVPTXInst<(outs), 5441 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5442 Int16Regs:$g), 5443 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5444 []>; 5445def SUST_B_1D_ARRAY_V2B16_ZERO 5446 : NVPTXInst<(outs), 5447 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5448 Int16Regs:$g), 5449 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5450 []>; 5451def SUST_B_1D_ARRAY_V2B32_ZERO 5452 : NVPTXInst<(outs), 5453 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5454 Int32Regs:$g), 5455 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5456 []>; 5457def SUST_B_1D_ARRAY_V2B64_ZERO 5458 : NVPTXInst<(outs), 5459 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5460 Int64Regs:$g), 5461 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5462 []>; 5463def SUST_B_1D_ARRAY_V4B8_ZERO 5464 : NVPTXInst<(outs), 5465 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5466 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5467 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " 5468 "\\{$r, $g, $b, $a\\};", 5469 []>; 5470def SUST_B_1D_ARRAY_V4B16_ZERO 5471 : NVPTXInst<(outs), 5472 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5473 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5474 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " 5475 "\\{$r, $g, $b, $a\\};", 5476 []>; 5477def SUST_B_1D_ARRAY_V4B32_ZERO 5478 : NVPTXInst<(outs), 5479 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5480 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5481 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " 5482 "\\{$r, $g, $b, $a\\};", 5483 []>; 5484 5485 5486def SUST_B_2D_B8_ZERO 5487 : NVPTXInst<(outs), 5488 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5489 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5490 []>; 5491def SUST_B_2D_B16_ZERO 5492 : NVPTXInst<(outs), 5493 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5494 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5495 []>; 5496def SUST_B_2D_B32_ZERO 5497 : NVPTXInst<(outs), 5498 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5499 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5500 []>; 5501def SUST_B_2D_B64_ZERO 5502 : NVPTXInst<(outs), 5503 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5504 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5505 []>; 5506def SUST_B_2D_V2B8_ZERO 5507 : NVPTXInst<(outs), 5508 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5509 Int16Regs:$g), 5510 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5511 []>; 5512def SUST_B_2D_V2B16_ZERO 5513 : NVPTXInst<(outs), 5514 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5515 Int16Regs:$g), 5516 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5517 []>; 5518def SUST_B_2D_V2B32_ZERO 5519 : NVPTXInst<(outs), 5520 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5521 Int32Regs:$g), 5522 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5523 []>; 5524def SUST_B_2D_V2B64_ZERO 5525 : NVPTXInst<(outs), 5526 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5527 Int64Regs:$g), 5528 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5529 []>; 5530def SUST_B_2D_V4B8_ZERO 5531 : NVPTXInst<(outs), 5532 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5533 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5534 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " 5535 "\\{$r, $g, $b, $a\\};", 5536 []>; 5537def SUST_B_2D_V4B16_ZERO 5538 : NVPTXInst<(outs), 5539 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5540 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5541 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " 5542 "\\{$r, $g, $b, $a\\};", 5543 []>; 5544def SUST_B_2D_V4B32_ZERO 5545 : NVPTXInst<(outs), 5546 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5547 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5548 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " 5549 "\\{$r, $g, $b, $a\\};", 5550 []>; 5551 5552 5553def SUST_B_2D_ARRAY_B8_ZERO 5554 : NVPTXInst<(outs), 5555 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5556 Int16Regs:$r), 5557 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5558 []>; 5559def SUST_B_2D_ARRAY_B16_ZERO 5560 : NVPTXInst<(outs), 5561 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5562 Int16Regs:$r), 5563 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5564 []>; 5565def SUST_B_2D_ARRAY_B32_ZERO 5566 : NVPTXInst<(outs), 5567 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5568 Int32Regs:$r), 5569 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5570 []>; 5571def SUST_B_2D_ARRAY_B64_ZERO 5572 : NVPTXInst<(outs), 5573 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5574 Int64Regs:$r), 5575 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5576 []>; 5577def SUST_B_2D_ARRAY_V2B8_ZERO 5578 : NVPTXInst<(outs), 5579 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5580 Int16Regs:$r, Int16Regs:$g), 5581 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5582 "\\{$r, $g\\};", 5583 []>; 5584def SUST_B_2D_ARRAY_V2B16_ZERO 5585 : NVPTXInst<(outs), 5586 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5587 Int16Regs:$r, Int16Regs:$g), 5588 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5589 "\\{$r, $g\\};", 5590 []>; 5591def SUST_B_2D_ARRAY_V2B32_ZERO 5592 : NVPTXInst<(outs), 5593 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5594 Int32Regs:$r, Int32Regs:$g), 5595 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5596 "\\{$r, $g\\};", 5597 []>; 5598def SUST_B_2D_ARRAY_V2B64_ZERO 5599 : NVPTXInst<(outs), 5600 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5601 Int64Regs:$r, Int64Regs:$g), 5602 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5603 "\\{$r, $g\\};", 5604 []>; 5605def SUST_B_2D_ARRAY_V4B8_ZERO 5606 : NVPTXInst<(outs), 5607 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5608 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5609 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5610 "\\{$r, $g, $b, $a\\};", 5611 []>; 5612def SUST_B_2D_ARRAY_V4B16_ZERO 5613 : NVPTXInst<(outs), 5614 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5615 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5616 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5617 "\\{$r, $g, $b, $a\\};", 5618 []>; 5619def SUST_B_2D_ARRAY_V4B32_ZERO 5620 : NVPTXInst<(outs), 5621 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5622 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5623 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5624 "\\{$r, $g, $b, $a\\};", 5625 []>; 5626 5627 5628def SUST_B_3D_B8_ZERO 5629 : NVPTXInst<(outs), 5630 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5631 Int16Regs:$r), 5632 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5633 []>; 5634def SUST_B_3D_B16_ZERO 5635 : NVPTXInst<(outs), 5636 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5637 Int16Regs:$r), 5638 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5639 []>; 5640def SUST_B_3D_B32_ZERO 5641 : NVPTXInst<(outs), 5642 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5643 Int32Regs:$r), 5644 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5645 []>; 5646def SUST_B_3D_B64_ZERO 5647 : NVPTXInst<(outs), 5648 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5649 Int64Regs:$r), 5650 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5651 []>; 5652def SUST_B_3D_V2B8_ZERO 5653 : NVPTXInst<(outs), 5654 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5655 Int16Regs:$r, Int16Regs:$g), 5656 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5657 "\\{$r, $g\\};", 5658 []>; 5659def SUST_B_3D_V2B16_ZERO 5660 : NVPTXInst<(outs), 5661 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5662 Int16Regs:$r, Int16Regs:$g), 5663 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5664 "\\{$r, $g\\};", 5665 []>; 5666def SUST_B_3D_V2B32_ZERO 5667 : NVPTXInst<(outs), 5668 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5669 Int32Regs:$r, Int32Regs:$g), 5670 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5671 "\\{$r, $g\\};", 5672 []>; 5673def SUST_B_3D_V2B64_ZERO 5674 : NVPTXInst<(outs), 5675 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5676 Int64Regs:$r, Int64Regs:$g), 5677 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5678 "\\{$r, $g\\};", 5679 []>; 5680def SUST_B_3D_V4B8_ZERO 5681 : NVPTXInst<(outs), 5682 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5683 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5684 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5685 "\\{$r, $g, $b, $a\\};", 5686 []>; 5687def SUST_B_3D_V4B16_ZERO 5688 : NVPTXInst<(outs), 5689 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5690 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5691 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5692 "\\{$r, $g, $b, $a\\};", 5693 []>; 5694def SUST_B_3D_V4B32_ZERO 5695 : NVPTXInst<(outs), 5696 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5697 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5698 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5699 "\\{$r, $g, $b, $a\\};", 5700 []>; 5701 5702 5703 5704// Formatted 5705 5706def SUST_P_1D_B8_TRAP 5707 : NVPTXInst<(outs), 5708 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5709 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5710 []>; 5711def SUST_P_1D_B16_TRAP 5712 : NVPTXInst<(outs), 5713 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5714 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5715 []>; 5716def SUST_P_1D_B32_TRAP 5717 : NVPTXInst<(outs), 5718 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5719 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5720 []>; 5721def SUST_P_1D_V2B8_TRAP 5722 : NVPTXInst<(outs), 5723 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5724 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5725 []>; 5726def SUST_P_1D_V2B16_TRAP 5727 : NVPTXInst<(outs), 5728 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5729 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5730 []>; 5731def SUST_P_1D_V2B32_TRAP 5732 : NVPTXInst<(outs), 5733 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5734 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5735 []>; 5736def SUST_P_1D_V4B8_TRAP 5737 : NVPTXInst<(outs), 5738 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5739 Int16Regs:$b, Int16Regs:$a), 5740 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5741 []>; 5742def SUST_P_1D_V4B16_TRAP 5743 : NVPTXInst<(outs), 5744 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5745 Int16Regs:$b, Int16Regs:$a), 5746 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5747 []>; 5748def SUST_P_1D_V4B32_TRAP 5749 : NVPTXInst<(outs), 5750 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5751 Int32Regs:$b, Int32Regs:$a), 5752 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5753 []>; 5754 5755 5756def SUST_P_1D_ARRAY_B8_TRAP 5757 : NVPTXInst<(outs), 5758 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5759 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5760 []>; 5761def SUST_P_1D_ARRAY_B16_TRAP 5762 : NVPTXInst<(outs), 5763 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5764 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5765 []>; 5766def SUST_P_1D_ARRAY_B32_TRAP 5767 : NVPTXInst<(outs), 5768 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5769 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5770 []>; 5771def SUST_P_1D_ARRAY_V2B8_TRAP 5772 : NVPTXInst<(outs), 5773 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5774 Int16Regs:$g), 5775 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5776 []>; 5777def SUST_P_1D_ARRAY_V2B16_TRAP 5778 : NVPTXInst<(outs), 5779 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5780 Int16Regs:$g), 5781 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5782 []>; 5783def SUST_P_1D_ARRAY_V2B32_TRAP 5784 : NVPTXInst<(outs), 5785 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5786 Int32Regs:$g), 5787 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5788 []>; 5789def SUST_P_1D_ARRAY_V4B8_TRAP 5790 : NVPTXInst<(outs), 5791 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5792 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5793 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 5794 "\\{$r, $g, $b, $a\\};", 5795 []>; 5796def SUST_P_1D_ARRAY_V4B16_TRAP 5797 : NVPTXInst<(outs), 5798 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5799 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5800 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 5801 "\\{$r, $g, $b, $a\\};", 5802 []>; 5803def SUST_P_1D_ARRAY_V4B32_TRAP 5804 : NVPTXInst<(outs), 5805 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5806 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5807 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 5808 "\\{$r, $g, $b, $a\\};", 5809 []>; 5810 5811 5812def SUST_P_2D_B8_TRAP 5813 : NVPTXInst<(outs), 5814 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5815 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5816 []>; 5817def SUST_P_2D_B16_TRAP 5818 : NVPTXInst<(outs), 5819 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5820 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5821 []>; 5822def SUST_P_2D_B32_TRAP 5823 : NVPTXInst<(outs), 5824 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5825 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5826 []>; 5827def SUST_P_2D_V2B8_TRAP 5828 : NVPTXInst<(outs), 5829 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5830 Int16Regs:$g), 5831 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5832 []>; 5833def SUST_P_2D_V2B16_TRAP 5834 : NVPTXInst<(outs), 5835 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5836 Int16Regs:$g), 5837 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5838 []>; 5839def SUST_P_2D_V2B32_TRAP 5840 : NVPTXInst<(outs), 5841 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5842 Int32Regs:$g), 5843 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5844 []>; 5845def SUST_P_2D_V4B8_TRAP 5846 : NVPTXInst<(outs), 5847 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5848 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5849 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 5850 "\\{$r, $g, $b, $a\\};", 5851 []>; 5852def SUST_P_2D_V4B16_TRAP 5853 : NVPTXInst<(outs), 5854 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5855 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5856 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 5857 "\\{$r, $g, $b, $a\\};", 5858 []>; 5859def SUST_P_2D_V4B32_TRAP 5860 : NVPTXInst<(outs), 5861 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5862 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5863 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 5864 "\\{$r, $g, $b, $a\\};", 5865 []>; 5866 5867 5868def SUST_P_2D_ARRAY_B8_TRAP 5869 : NVPTXInst<(outs), 5870 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5871 Int16Regs:$r), 5872 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5873 []>; 5874def SUST_P_2D_ARRAY_B16_TRAP 5875 : NVPTXInst<(outs), 5876 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5877 Int16Regs:$r), 5878 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5879 []>; 5880def SUST_P_2D_ARRAY_B32_TRAP 5881 : NVPTXInst<(outs), 5882 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5883 Int32Regs:$r), 5884 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5885 []>; 5886def SUST_P_2D_ARRAY_V2B8_TRAP 5887 : NVPTXInst<(outs), 5888 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5889 Int16Regs:$r, Int16Regs:$g), 5890 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5891 "\\{$r, $g\\};", 5892 []>; 5893def SUST_P_2D_ARRAY_V2B16_TRAP 5894 : NVPTXInst<(outs), 5895 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5896 Int16Regs:$r, Int16Regs:$g), 5897 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5898 "\\{$r, $g\\};", 5899 []>; 5900def SUST_P_2D_ARRAY_V2B32_TRAP 5901 : NVPTXInst<(outs), 5902 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5903 Int32Regs:$r, Int32Regs:$g), 5904 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5905 "\\{$r, $g\\};", 5906 []>; 5907def SUST_P_2D_ARRAY_V4B8_TRAP 5908 : NVPTXInst<(outs), 5909 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5910 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5911 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5912 "\\{$r, $g, $b, $a\\};", 5913 []>; 5914def SUST_P_2D_ARRAY_V4B16_TRAP 5915 : NVPTXInst<(outs), 5916 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5917 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5918 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5919 "\\{$r, $g, $b, $a\\};", 5920 []>; 5921def SUST_P_2D_ARRAY_V4B32_TRAP 5922 : NVPTXInst<(outs), 5923 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5924 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5925 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5926 "\\{$r, $g, $b, $a\\};", 5927 []>; 5928 5929 5930def SUST_P_3D_B8_TRAP 5931 : NVPTXInst<(outs), 5932 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5933 Int16Regs:$r), 5934 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5935 []>; 5936def SUST_P_3D_B16_TRAP 5937 : NVPTXInst<(outs), 5938 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5939 Int16Regs:$r), 5940 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5941 []>; 5942def SUST_P_3D_B32_TRAP 5943 : NVPTXInst<(outs), 5944 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5945 Int32Regs:$r), 5946 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5947 []>; 5948def SUST_P_3D_V2B8_TRAP 5949 : NVPTXInst<(outs), 5950 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5951 Int16Regs:$r, Int16Regs:$g), 5952 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5953 "\\{$r, $g\\};", 5954 []>; 5955def SUST_P_3D_V2B16_TRAP 5956 : NVPTXInst<(outs), 5957 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5958 Int16Regs:$r, Int16Regs:$g), 5959 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5960 "\\{$r, $g\\};", 5961 []>; 5962def SUST_P_3D_V2B32_TRAP 5963 : NVPTXInst<(outs), 5964 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5965 Int32Regs:$r, Int32Regs:$g), 5966 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5967 "\\{$r, $g\\};", 5968 []>; 5969def SUST_P_3D_V4B8_TRAP 5970 : NVPTXInst<(outs), 5971 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5972 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5973 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5974 "\\{$r, $g, $b, $a\\};", 5975 []>; 5976def SUST_P_3D_V4B16_TRAP 5977 : NVPTXInst<(outs), 5978 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5979 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5980 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5981 "\\{$r, $g, $b, $a\\};", 5982 []>; 5983def SUST_P_3D_V4B32_TRAP 5984 : NVPTXInst<(outs), 5985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5986 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5987 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5988 "\\{$r, $g, $b, $a\\};", 5989 []>; 5990} 5991 5992// Surface store instruction patterns 5993// I'm not sure why we can't just include these in the instruction definitions, 5994// but TableGen complains of type errors :( 5995 5996// .clamp variant 5997def : Pat<(int_nvvm_sust_b_1d_i8_clamp 5998 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5999 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6000 6001def : Pat<(int_nvvm_sust_b_1d_i16_clamp 6002 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6003 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6004 6005def : Pat<(int_nvvm_sust_b_1d_i32_clamp 6006 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6007 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6008 6009def : Pat<(int_nvvm_sust_b_1d_i64_clamp 6010 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6011 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6012 6013def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 6014 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6015 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6016 Int16Regs:$r, Int16Regs:$g)>; 6017 6018def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 6019 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6020 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6021 Int16Regs:$r, Int16Regs:$g)>; 6022 6023def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 6024 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6025 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6026 Int32Regs:$r, Int32Regs:$g)>; 6027 6028def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 6029 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6030 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, 6031 Int64Regs:$r, Int64Regs:$g)>; 6032 6033def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 6034 Int64Regs:$s, Int32Regs:$x, 6035 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6036 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6037 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6038 6039def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 6040 Int64Regs:$s, Int32Regs:$x, 6041 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6042 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6043 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6044 6045def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 6046 Int64Regs:$s, Int32Regs:$x, 6047 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6048 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6049 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6050 6051 6052 6053def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 6054 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6055 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6056 Int16Regs:$r)>; 6057 6058def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 6059 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6060 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6061 Int16Regs:$r)>; 6062 6063def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 6064 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6065 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6066 Int32Regs:$r)>; 6067 6068def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 6069 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6070 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6071 Int64Regs:$r)>; 6072 6073def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 6074 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6075 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6076 Int16Regs:$r, Int16Regs:$g)>; 6077 6078def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 6079 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6080 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6081 Int16Regs:$r, Int16Regs:$g)>; 6082 6083def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 6084 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6085 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6086 Int32Regs:$r, Int32Regs:$g)>; 6087 6088def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 6089 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6090 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6091 Int64Regs:$r, Int64Regs:$g)>; 6092 6093def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 6094 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6095 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6096 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6097 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6098 6099def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 6100 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6101 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6102 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6103 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6104 6105def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 6106 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6107 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6108 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6109 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6110 6111 6112 6113def : Pat<(int_nvvm_sust_b_2d_i8_clamp 6114 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6115 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6116 Int16Regs:$r)>; 6117 6118def : Pat<(int_nvvm_sust_b_2d_i16_clamp 6119 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6120 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6121 Int16Regs:$r)>; 6122 6123def : Pat<(int_nvvm_sust_b_2d_i32_clamp 6124 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6125 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6126 Int32Regs:$r)>; 6127 6128def : Pat<(int_nvvm_sust_b_2d_i64_clamp 6129 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6130 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6131 Int64Regs:$r)>; 6132 6133def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 6134 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6135 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6136 Int16Regs:$r, Int16Regs:$g)>; 6137 6138def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 6139 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6140 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6141 Int16Regs:$r, Int16Regs:$g)>; 6142 6143def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 6144 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6145 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6146 Int32Regs:$r, Int32Regs:$g)>; 6147 6148def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 6149 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6150 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6151 Int64Regs:$r, Int64Regs:$g)>; 6152 6153def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 6154 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6155 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6156 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6157 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6158 6159def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 6160 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6161 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6162 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6163 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6164 6165def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 6166 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6167 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6168 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6169 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6170 6171 6172 6173def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 6174 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6175 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, 6176 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6177 Int16Regs:$r)>; 6178 6179def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 6180 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6181 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, 6182 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6183 Int16Regs:$r)>; 6184 6185def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 6186 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6187 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, 6188 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6189 Int32Regs:$r)>; 6190 6191def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 6192 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6193 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, 6194 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6195 Int64Regs:$r)>; 6196 6197def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 6198 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6199 Int16Regs:$r, Int16Regs:$g), 6200 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, 6201 Int32Regs:$x, Int32Regs:$y, 6202 Int16Regs:$r, Int16Regs:$g)>; 6203 6204def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 6205 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6206 Int16Regs:$r, Int16Regs:$g), 6207 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, 6208 Int32Regs:$x, Int32Regs:$y, 6209 Int16Regs:$r, Int16Regs:$g)>; 6210 6211def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 6212 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6213 Int32Regs:$g), 6214 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6215 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6216 6217def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 6218 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6219 Int64Regs:$g), 6220 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, 6221 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6222 6223def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 6224 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6225 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6226 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, 6227 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6228 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6229 6230def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 6231 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6232 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6233 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, 6234 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6235 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6236 6237def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 6238 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6239 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6240 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6241 Int32Regs:$x, Int32Regs:$y, 6242 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6243 6244 6245 6246def : Pat<(int_nvvm_sust_b_3d_i8_clamp 6247 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6248 Int16Regs:$r), 6249 (SUST_B_3D_B8_CLAMP Int64Regs:$s, 6250 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6251 Int16Regs:$r)>; 6252 6253def : Pat<(int_nvvm_sust_b_3d_i16_clamp 6254 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6255 Int16Regs:$r), 6256 (SUST_B_3D_B16_CLAMP Int64Regs:$s, 6257 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6258 Int16Regs:$r)>; 6259 6260def : Pat<(int_nvvm_sust_b_3d_i32_clamp 6261 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6262 Int32Regs:$r), 6263 (SUST_B_3D_B32_CLAMP Int64Regs:$s, 6264 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6265 Int32Regs:$r)>; 6266 6267def : Pat<(int_nvvm_sust_b_3d_i64_clamp 6268 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6269 Int64Regs:$r), 6270 (SUST_B_3D_B64_CLAMP Int64Regs:$s, 6271 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6272 Int64Regs:$r)>; 6273 6274def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 6275 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6276 Int16Regs:$r, Int16Regs:$g), 6277 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, 6278 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6279 Int16Regs:$r, Int16Regs:$g)>; 6280 6281def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 6282 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6283 Int16Regs:$r, Int16Regs:$g), 6284 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, 6285 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6286 Int16Regs:$r, Int16Regs:$g)>; 6287 6288def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 6289 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6290 Int32Regs:$r, Int32Regs:$g), 6291 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, 6292 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6293 Int32Regs:$r, Int32Regs:$g)>; 6294 6295def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 6296 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6297 Int64Regs:$r, Int64Regs:$g), 6298 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, 6299 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6300 Int64Regs:$r, Int64Regs:$g)>; 6301 6302def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 6303 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6304 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6305 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, 6306 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6307 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6308 6309def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 6310 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6311 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6312 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, 6313 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6314 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6315 6316def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 6317 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6318 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6319 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, 6320 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6321 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6322 6323 6324// .trap variant 6325def : Pat<(int_nvvm_sust_b_1d_i8_trap 6326 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6327 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6328 6329def : Pat<(int_nvvm_sust_b_1d_i16_trap 6330 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6331 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6332 6333def : Pat<(int_nvvm_sust_b_1d_i32_trap 6334 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6335 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6336 6337def : Pat<(int_nvvm_sust_b_1d_i64_trap 6338 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6339 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6340 6341def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 6342 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6343 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 6344 Int16Regs:$r, Int16Regs:$g)>; 6345 6346def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 6347 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6348 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 6349 Int16Regs:$r, Int16Regs:$g)>; 6350 6351def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 6352 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6353 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 6354 Int32Regs:$r, Int32Regs:$g)>; 6355 6356def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 6357 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6358 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, 6359 Int64Regs:$r, Int64Regs:$g)>; 6360 6361def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 6362 Int64Regs:$s, Int32Regs:$x, 6363 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6364 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 6365 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6366 6367def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 6368 Int64Regs:$s, Int32Regs:$x, 6369 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6370 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 6371 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6372 6373def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 6374 Int64Regs:$s, Int32Regs:$x, 6375 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6376 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 6377 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6378 6379 6380 6381def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 6382 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6383 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6384 Int16Regs:$r)>; 6385 6386def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 6387 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6388 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6389 Int16Regs:$r)>; 6390 6391def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 6392 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6393 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6394 Int32Regs:$r)>; 6395 6396def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 6397 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6398 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6399 Int64Regs:$r)>; 6400 6401def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 6402 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6403 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6404 Int16Regs:$r, Int16Regs:$g)>; 6405 6406def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 6407 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6408 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6409 Int16Regs:$r, Int16Regs:$g)>; 6410 6411def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 6412 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6413 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6414 Int32Regs:$r, Int32Regs:$g)>; 6415 6416def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 6417 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6418 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6419 Int64Regs:$r, Int64Regs:$g)>; 6420 6421def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 6422 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6423 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6424 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6425 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6426 6427def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 6428 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6429 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6430 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6431 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6432 6433def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 6434 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6435 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6436 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6437 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6438 6439 6440 6441def : Pat<(int_nvvm_sust_b_2d_i8_trap 6442 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6443 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6444 Int16Regs:$r)>; 6445 6446def : Pat<(int_nvvm_sust_b_2d_i16_trap 6447 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6448 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6449 Int16Regs:$r)>; 6450 6451def : Pat<(int_nvvm_sust_b_2d_i32_trap 6452 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6453 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6454 Int32Regs:$r)>; 6455 6456def : Pat<(int_nvvm_sust_b_2d_i64_trap 6457 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6458 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6459 Int64Regs:$r)>; 6460 6461def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 6462 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6463 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6464 Int16Regs:$r, Int16Regs:$g)>; 6465 6466def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 6467 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6468 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6469 Int16Regs:$r, Int16Regs:$g)>; 6470 6471def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 6472 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6473 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6474 Int32Regs:$r, Int32Regs:$g)>; 6475 6476def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 6477 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6478 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6479 Int64Regs:$r, Int64Regs:$g)>; 6480 6481def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 6482 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6483 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6484 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6485 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6486 6487def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 6488 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6489 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6490 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6491 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6492 6493def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 6494 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6495 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6496 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6497 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6498 6499 6500 6501def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 6502 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6503 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, 6504 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6505 Int16Regs:$r)>; 6506 6507def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 6508 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6509 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, 6510 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6511 Int16Regs:$r)>; 6512 6513def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 6514 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6515 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, 6516 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6517 Int32Regs:$r)>; 6518 6519def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 6520 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6521 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, 6522 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6523 Int64Regs:$r)>; 6524 6525def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 6526 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6527 Int16Regs:$r, Int16Regs:$g), 6528 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 6529 Int32Regs:$x, Int32Regs:$y, 6530 Int16Regs:$r, Int16Regs:$g)>; 6531 6532def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 6533 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6534 Int16Regs:$r, Int16Regs:$g), 6535 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 6536 Int32Regs:$x, Int32Regs:$y, 6537 Int16Regs:$r, Int16Regs:$g)>; 6538 6539def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 6540 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6541 Int32Regs:$g), 6542 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 6543 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6544 6545def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 6546 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6547 Int64Regs:$g), 6548 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, 6549 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6550 6551def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 6552 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6553 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6554 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 6555 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6556 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6557 6558def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 6559 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6560 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6561 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 6562 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6563 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6564 6565def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 6566 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6567 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6568 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 6569 Int32Regs:$x, Int32Regs:$y, 6570 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6571 6572 6573 6574def : Pat<(int_nvvm_sust_b_3d_i8_trap 6575 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6576 Int16Regs:$r), 6577 (SUST_B_3D_B8_TRAP Int64Regs:$s, 6578 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6579 Int16Regs:$r)>; 6580 6581def : Pat<(int_nvvm_sust_b_3d_i16_trap 6582 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6583 Int16Regs:$r), 6584 (SUST_B_3D_B16_TRAP Int64Regs:$s, 6585 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6586 Int16Regs:$r)>; 6587 6588def : Pat<(int_nvvm_sust_b_3d_i32_trap 6589 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6590 Int32Regs:$r), 6591 (SUST_B_3D_B32_TRAP Int64Regs:$s, 6592 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6593 Int32Regs:$r)>; 6594 6595def : Pat<(int_nvvm_sust_b_3d_i64_trap 6596 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6597 Int64Regs:$r), 6598 (SUST_B_3D_B64_TRAP Int64Regs:$s, 6599 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6600 Int64Regs:$r)>; 6601 6602def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 6603 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6604 Int16Regs:$r, Int16Regs:$g), 6605 (SUST_B_3D_V2B8_TRAP Int64Regs:$s, 6606 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6607 Int16Regs:$r, Int16Regs:$g)>; 6608 6609def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 6610 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6611 Int16Regs:$r, Int16Regs:$g), 6612 (SUST_B_3D_V2B16_TRAP Int64Regs:$s, 6613 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6614 Int16Regs:$r, Int16Regs:$g)>; 6615 6616def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 6617 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6618 Int32Regs:$r, Int32Regs:$g), 6619 (SUST_B_3D_V2B32_TRAP Int64Regs:$s, 6620 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6621 Int32Regs:$r, Int32Regs:$g)>; 6622 6623def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 6624 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6625 Int64Regs:$r, Int64Regs:$g), 6626 (SUST_B_3D_V2B64_TRAP Int64Regs:$s, 6627 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6628 Int64Regs:$r, Int64Regs:$g)>; 6629 6630def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 6631 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6632 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6633 (SUST_B_3D_V4B8_TRAP Int64Regs:$s, 6634 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6635 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6636 6637def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 6638 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6639 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6640 (SUST_B_3D_V4B16_TRAP Int64Regs:$s, 6641 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6642 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6643 6644def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 6645 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6646 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6647 (SUST_B_3D_V4B32_TRAP Int64Regs:$s, 6648 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6649 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6650 6651 6652// .zero variant 6653def : Pat<(int_nvvm_sust_b_1d_i8_zero 6654 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6655 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6656 6657def : Pat<(int_nvvm_sust_b_1d_i16_zero 6658 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6659 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6660 6661def : Pat<(int_nvvm_sust_b_1d_i32_zero 6662 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6663 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6664 6665def : Pat<(int_nvvm_sust_b_1d_i64_zero 6666 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6667 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6668 6669def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 6670 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6671 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, 6672 Int16Regs:$r, Int16Regs:$g)>; 6673 6674def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 6675 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6676 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, 6677 Int16Regs:$r, Int16Regs:$g)>; 6678 6679def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 6680 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6681 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, 6682 Int32Regs:$r, Int32Regs:$g)>; 6683 6684def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 6685 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6686 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, 6687 Int64Regs:$r, Int64Regs:$g)>; 6688 6689def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 6690 Int64Regs:$s, Int32Regs:$x, 6691 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6692 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, 6693 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6694 6695def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 6696 Int64Regs:$s, Int32Regs:$x, 6697 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6698 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, 6699 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6700 6701def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 6702 Int64Regs:$s, Int32Regs:$x, 6703 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6704 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, 6705 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6706 6707 6708 6709def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 6710 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6711 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6712 Int16Regs:$r)>; 6713 6714def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 6715 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6716 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6717 Int16Regs:$r)>; 6718 6719def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 6720 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6721 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6722 Int32Regs:$r)>; 6723 6724def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 6725 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6726 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6727 Int64Regs:$r)>; 6728 6729def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 6730 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6731 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6732 Int16Regs:$r, Int16Regs:$g)>; 6733 6734def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 6735 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6736 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6737 Int16Regs:$r, Int16Regs:$g)>; 6738 6739def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 6740 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6741 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6742 Int32Regs:$r, Int32Regs:$g)>; 6743 6744def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 6745 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6746 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6747 Int64Regs:$r, Int64Regs:$g)>; 6748 6749def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 6750 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6751 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6752 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6753 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6754 6755def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 6756 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6757 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6758 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6759 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6760 6761def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 6762 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6763 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6764 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6765 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6766 6767 6768 6769def : Pat<(int_nvvm_sust_b_2d_i8_zero 6770 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6771 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6772 Int16Regs:$r)>; 6773 6774def : Pat<(int_nvvm_sust_b_2d_i16_zero 6775 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6776 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6777 Int16Regs:$r)>; 6778 6779def : Pat<(int_nvvm_sust_b_2d_i32_zero 6780 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6781 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6782 Int32Regs:$r)>; 6783 6784def : Pat<(int_nvvm_sust_b_2d_i64_zero 6785 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6786 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6787 Int64Regs:$r)>; 6788 6789def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 6790 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6791 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6792 Int16Regs:$r, Int16Regs:$g)>; 6793 6794def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 6795 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6796 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6797 Int16Regs:$r, Int16Regs:$g)>; 6798 6799def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 6800 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6801 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6802 Int32Regs:$r, Int32Regs:$g)>; 6803 6804def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 6805 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6806 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6807 Int64Regs:$r, Int64Regs:$g)>; 6808 6809def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 6810 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6811 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6812 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6813 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6814 6815def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 6816 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6817 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6818 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6819 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6820 6821def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 6822 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6823 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6824 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6825 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6826 6827 6828 6829def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 6830 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6831 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, 6832 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6833 Int16Regs:$r)>; 6834 6835def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 6836 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6837 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, 6838 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6839 Int16Regs:$r)>; 6840 6841def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 6842 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6843 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, 6844 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6845 Int32Regs:$r)>; 6846 6847def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 6848 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6849 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, 6850 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6851 Int64Regs:$r)>; 6852 6853def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 6854 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6855 Int16Regs:$r, Int16Regs:$g), 6856 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, 6857 Int32Regs:$x, Int32Regs:$y, 6858 Int16Regs:$r, Int16Regs:$g)>; 6859 6860def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 6861 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6862 Int16Regs:$r, Int16Regs:$g), 6863 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, 6864 Int32Regs:$x, Int32Regs:$y, 6865 Int16Regs:$r, Int16Regs:$g)>; 6866 6867def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 6868 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6869 Int32Regs:$g), 6870 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, 6871 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6872 6873def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 6874 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6875 Int64Regs:$g), 6876 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, 6877 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6878 6879def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 6880 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6881 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6882 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, 6883 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6884 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6885 6886def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 6887 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6888 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6889 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, 6890 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6891 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6892 6893def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 6894 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6895 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6896 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, 6897 Int32Regs:$x, Int32Regs:$y, 6898 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6899 6900 6901 6902def : Pat<(int_nvvm_sust_b_3d_i8_zero 6903 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6904 Int16Regs:$r), 6905 (SUST_B_3D_B8_ZERO Int64Regs:$s, 6906 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6907 Int16Regs:$r)>; 6908 6909def : Pat<(int_nvvm_sust_b_3d_i16_zero 6910 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6911 Int16Regs:$r), 6912 (SUST_B_3D_B16_ZERO Int64Regs:$s, 6913 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6914 Int16Regs:$r)>; 6915 6916def : Pat<(int_nvvm_sust_b_3d_i32_zero 6917 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6918 Int32Regs:$r), 6919 (SUST_B_3D_B32_ZERO Int64Regs:$s, 6920 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6921 Int32Regs:$r)>; 6922 6923def : Pat<(int_nvvm_sust_b_3d_i64_zero 6924 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6925 Int64Regs:$r), 6926 (SUST_B_3D_B64_ZERO Int64Regs:$s, 6927 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6928 Int64Regs:$r)>; 6929 6930def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 6931 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6932 Int16Regs:$r, Int16Regs:$g), 6933 (SUST_B_3D_V2B8_ZERO Int64Regs:$s, 6934 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6935 Int16Regs:$r, Int16Regs:$g)>; 6936 6937def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 6938 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6939 Int16Regs:$r, Int16Regs:$g), 6940 (SUST_B_3D_V2B16_ZERO Int64Regs:$s, 6941 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6942 Int16Regs:$r, Int16Regs:$g)>; 6943 6944def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 6945 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6946 Int32Regs:$r, Int32Regs:$g), 6947 (SUST_B_3D_V2B32_ZERO Int64Regs:$s, 6948 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6949 Int32Regs:$r, Int32Regs:$g)>; 6950 6951def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 6952 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6953 Int64Regs:$r, Int64Regs:$g), 6954 (SUST_B_3D_V2B64_ZERO Int64Regs:$s, 6955 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6956 Int64Regs:$r, Int64Regs:$g)>; 6957 6958def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 6959 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6960 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6961 (SUST_B_3D_V4B8_ZERO Int64Regs:$s, 6962 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6963 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6964 6965def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 6966 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6967 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6968 (SUST_B_3D_V4B16_ZERO Int64Regs:$s, 6969 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6970 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6971 6972def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 6973 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6974 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6975 (SUST_B_3D_V4B32_ZERO Int64Regs:$s, 6976 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6977 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6978 6979 6980 6981 6982def : Pat<(int_nvvm_sust_p_1d_i8_trap 6983 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6984 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6985 6986def : Pat<(int_nvvm_sust_p_1d_i16_trap 6987 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6988 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6989 6990def : Pat<(int_nvvm_sust_p_1d_i32_trap 6991 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6992 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6993 6994def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 6995 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6996 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 6997 Int16Regs:$r, Int16Regs:$g)>; 6998 6999def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 7000 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7001 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 7002 Int16Regs:$r, Int16Regs:$g)>; 7003 7004def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 7005 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7006 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 7007 Int32Regs:$r, Int32Regs:$g)>; 7008 7009def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 7010 Int64Regs:$s, Int32Regs:$x, 7011 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7012 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 7013 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7014 7015def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 7016 Int64Regs:$s, Int32Regs:$x, 7017 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7018 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 7019 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7020 7021def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 7022 Int64Regs:$s, Int32Regs:$x, 7023 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7024 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 7025 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7026 7027 7028 7029def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 7030 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7031 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7032 Int16Regs:$r)>; 7033 7034def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 7035 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7036 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7037 Int16Regs:$r)>; 7038 7039def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 7040 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 7041 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7042 Int32Regs:$r)>; 7043 7044def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 7045 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7046 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7047 Int16Regs:$r, Int16Regs:$g)>; 7048 7049def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 7050 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7051 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7052 Int16Regs:$r, Int16Regs:$g)>; 7053 7054def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 7055 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7056 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7057 Int32Regs:$r, Int32Regs:$g)>; 7058 7059def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 7060 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7061 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7062 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7063 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7064 7065def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 7066 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7067 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7068 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7069 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7070 7071def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 7072 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7073 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7074 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7075 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7076 7077 7078 7079def : Pat<(int_nvvm_sust_p_2d_i8_trap 7080 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7081 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7082 Int16Regs:$r)>; 7083 7084def : Pat<(int_nvvm_sust_p_2d_i16_trap 7085 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7086 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7087 Int16Regs:$r)>; 7088 7089def : Pat<(int_nvvm_sust_p_2d_i32_trap 7090 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7091 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7092 Int32Regs:$r)>; 7093 7094def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 7095 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7096 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7097 Int16Regs:$r, Int16Regs:$g)>; 7098 7099def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 7100 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7101 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7102 Int16Regs:$r, Int16Regs:$g)>; 7103 7104def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 7105 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 7106 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7107 Int32Regs:$r, Int32Regs:$g)>; 7108 7109def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 7110 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7111 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7112 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7113 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7114 7115def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 7116 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7117 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7118 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7119 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7120 7121def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 7122 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7123 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7124 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7125 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7126 7127 7128 7129def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 7130 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7131 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, 7132 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7133 Int16Regs:$r)>; 7134 7135def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 7136 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7137 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, 7138 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7139 Int16Regs:$r)>; 7140 7141def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 7142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7143 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, 7144 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7145 Int32Regs:$r)>; 7146 7147def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 7148 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7149 Int16Regs:$r, Int16Regs:$g), 7150 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 7151 Int32Regs:$x, Int32Regs:$y, 7152 Int16Regs:$r, Int16Regs:$g)>; 7153 7154def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 7155 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7156 Int16Regs:$r, Int16Regs:$g), 7157 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 7158 Int32Regs:$x, Int32Regs:$y, 7159 Int16Regs:$r, Int16Regs:$g)>; 7160 7161def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 7162 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 7163 Int32Regs:$g), 7164 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 7165 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 7166 7167def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 7168 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7169 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7170 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 7171 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7172 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7173 7174def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 7175 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7176 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7177 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 7178 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7179 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7180 7181def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 7182 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7183 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7184 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 7185 Int32Regs:$x, Int32Regs:$y, 7186 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7187 7188 7189 7190def : Pat<(int_nvvm_sust_p_3d_i8_trap 7191 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7192 Int16Regs:$r), 7193 (SUST_P_3D_B8_TRAP Int64Regs:$s, 7194 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7195 Int16Regs:$r)>; 7196 7197def : Pat<(int_nvvm_sust_p_3d_i16_trap 7198 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7199 Int16Regs:$r), 7200 (SUST_P_3D_B16_TRAP Int64Regs:$s, 7201 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7202 Int16Regs:$r)>; 7203 7204def : Pat<(int_nvvm_sust_p_3d_i32_trap 7205 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7206 Int32Regs:$r), 7207 (SUST_P_3D_B32_TRAP Int64Regs:$s, 7208 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7209 Int32Regs:$r)>; 7210 7211def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 7212 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7213 Int16Regs:$r, Int16Regs:$g), 7214 (SUST_P_3D_V2B8_TRAP Int64Regs:$s, 7215 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7216 Int16Regs:$r, Int16Regs:$g)>; 7217 7218def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 7219 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7220 Int16Regs:$r, Int16Regs:$g), 7221 (SUST_P_3D_V2B16_TRAP Int64Regs:$s, 7222 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7223 Int16Regs:$r, Int16Regs:$g)>; 7224 7225def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 7226 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7227 Int32Regs:$r, Int32Regs:$g), 7228 (SUST_P_3D_V2B32_TRAP Int64Regs:$s, 7229 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7230 Int32Regs:$r, Int32Regs:$g)>; 7231 7232def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 7233 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7234 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7235 (SUST_P_3D_V4B8_TRAP Int64Regs:$s, 7236 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7237 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7238 7239def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 7240 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7241 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7242 (SUST_P_3D_V4B16_TRAP Int64Regs:$s, 7243 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7244 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7245 7246def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 7247 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7248 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7249 (SUST_P_3D_V4B32_TRAP Int64Regs:$s, 7250 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7251 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7252 7253//----------------------------------- 7254// Read Special Registers 7255//----------------------------------- 7256 7257class PTX_READ_SREG_R64<string regname, Intrinsic intop> 7258 : NVPTXInst<(outs Int64Regs:$d), (ins), 7259 !strconcat("mov.u64 \t$d, %", regname, ";"), 7260 [(set Int64Regs:$d, (intop))]>; 7261 7262class PTX_READ_SREG_R32<string regname, Intrinsic intop> 7263 : NVPTXInst<(outs Int32Regs:$d), (ins), 7264 !strconcat("mov.u32 \t$d, %", regname, ";"), 7265 [(set Int32Regs:$d, (intop))]>; 7266 7267// TODO Add read vector-version of special registers 7268 7269def INT_PTX_SREG_TID_X : 7270 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>; 7271def INT_PTX_SREG_TID_Y : 7272 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>; 7273def INT_PTX_SREG_TID_Z : 7274 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>; 7275def INT_PTX_SREG_TID_W : 7276 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>; 7277 7278def INT_PTX_SREG_NTID_X : 7279 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>; 7280def INT_PTX_SREG_NTID_Y : 7281 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>; 7282def INT_PTX_SREG_NTID_Z : 7283 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>; 7284def INT_PTX_SREG_NTID_W : 7285 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>; 7286 7287def INT_PTX_SREG_LANEID : 7288 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 7289def INT_PTX_SREG_WARPID : 7290 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 7291def INT_PTX_SREG_NWARPID : 7292 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 7293 7294def INT_PTX_SREG_CTAID_X : 7295 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>; 7296def INT_PTX_SREG_CTAID_Y : 7297 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>; 7298def INT_PTX_SREG_CTAID_Z : 7299 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>; 7300def INT_PTX_SREG_CTAID_W : 7301 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>; 7302 7303def INT_PTX_SREG_NCTAID_X : 7304 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>; 7305def INT_PTX_SREG_NCTAID_Y : 7306 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>; 7307def INT_PTX_SREG_NCTAID_Z : 7308 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>; 7309def INT_PTX_SREG_NCTAID_W : 7310 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>; 7311 7312def INT_PTX_SREG_SMID : 7313 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 7314def INT_PTX_SREG_NSMID : 7315 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 7316def INT_PTX_SREG_GRIDID : 7317 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 7318 7319def INT_PTX_SREG_LANEMASK_EQ : 7320 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 7321def INT_PTX_SREG_LANEMASK_LE : 7322 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 7323def INT_PTX_SREG_LANEMASK_LT : 7324 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 7325def INT_PTX_SREG_LANEMASK_GE : 7326 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 7327def INT_PTX_SREG_LANEMASK_GT : 7328 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 7329 7330def INT_PTX_SREG_CLOCK : 7331 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 7332def INT_PTX_SREG_CLOCK64 : 7333 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 7334 7335def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 7336def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 7337def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 7338def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 7339 7340// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 7341// handle the constant. 7342def INT_PTX_SREG_WARPSIZE : 7343 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 7344 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 7345 7346// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 7347// In addition to target-independent fields provided by WMMA_REGS, it adds 7348// the fields commonly used to implement specific PTX instruction -- register 7349// types and names, constraints, parts of assembly, etc. 7350class WMMA_REGINFO<WMMA_REGS r> 7351 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 7352 // NVPTX register types used to carry fragment data. 7353 NVPTXRegClass regclass = !cond( 7354 !eq(ptx_elt_type, "f16") : Float16x2Regs, 7355 !eq(ptx_elt_type, "f32") : Float32Regs, 7356 !eq(ptx_elt_type, "s32") : Int32Regs, 7357 !eq(ptx_elt_type, "s8") : Int32Regs, 7358 !eq(ptx_elt_type, "u8") : Int32Regs, 7359 !eq(ptx_elt_type, "s4") : Int32Regs, 7360 !eq(ptx_elt_type, "u4") : Int32Regs, 7361 !eq(ptx_elt_type, "b1") : Int32Regs); 7362 7363 // Instruction input/output arguments for the fragment. 7364 list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass); 7365 7366 // List of register names for the fragment -- ["ra0", "ra1",...] 7367 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 7368 7369 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 7370 string regstring = "{{$" # !head(reg_names) 7371 # !foldl("", !tail(reg_names), a, b, 7372 !strconcat(a, ", $", b)) 7373 # "}}"; 7374 7375 // Predicates for particular fragment variant. Technically those are 7376 // per-instruction predicates, but currently all fragments that can be used in 7377 // a given instruction are subject to the same constraints, so an instruction 7378 // can use predicates from any of its fragments. If/when this is no 7379 // longer the case, we can concat all per-fragment predicates to enforce that 7380 // all fragments of the instruction are viable. 7381 list<Predicate> Predicates = !cond( 7382 // fp16 -> fp16/fp32 @ m16n16k16 7383 !and(!eq(geom, "m16n16k16"), 7384 !or(!eq(ptx_elt_type, "f16"), 7385 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60], 7386 7387 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 7388 !and(!or(!eq(geom, "m8n32k16"), 7389 !eq(geom, "m32n8k16")), 7390 !or(!eq(ptx_elt_type, "f16"), 7391 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61], 7392 7393 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 7394 !and(!or(!eq(geom,"m16n16k16"), 7395 !eq(geom,"m8n32k16"), 7396 !eq(geom,"m32n8k16")), 7397 !or(!eq(ptx_elt_type, "u8"), 7398 !eq(ptx_elt_type, "s8"), 7399 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63], 7400 7401 // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1) 7402 !or(!eq(geom,"m8n8k128"), 7403 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63], 7404 7405 !eq(geom, "m8n8k4") : [hasSM70, hasPTX64]); 7406 7407 // template DAGs for instruction inputs/output. 7408 dag Outs = !dag(outs, ptx_regs, reg_names); 7409 dag Ins = !dag(ins, ptx_regs, reg_names); 7410} 7411 7412// Convert dag of arguments into a dag to match given intrinsic. 7413class BuildPatternI<Intrinsic Intr, dag Ins> { 7414 // Build a dag pattern that matches the intrinsic call. 7415 dag ret = !foreach(tmp, Ins, 7416 !subst(imem, ADDRvar, 7417 !subst(MEMri64, ADDRri64, 7418 !subst(MEMri, ADDRri, 7419 !subst(ins, Intr, tmp))))); 7420} 7421 7422// Same as above, but uses PatFrag instead of an Intrinsic. 7423class BuildPatternPF<PatFrag Intr, dag Ins> { 7424 // Build a dag pattern that matches the intrinsic call. 7425 dag ret = !foreach(tmp, Ins, 7426 !subst(imem, ADDRvar, 7427 !subst(MEMri64, ADDRri64, 7428 !subst(MEMri, ADDRri, 7429 !subst(ins, Intr, tmp))))); 7430} 7431 7432// Common WMMA-related fields used for building patterns for all MMA instructions. 7433class WMMA_INSTR<string _Intr, list<dag> _Args> 7434 : NVPTXInst<(outs), (ins), "?", []> { 7435 Intrinsic Intr = !cast<Intrinsic>(_Intr); 7436 // Concatenate all arguments into a single dag. 7437 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 7438 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 7439 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 7440} 7441 7442// 7443// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7444// 7445 7446class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 7447 DAGOperand SrcOp> 7448 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 7449 [!con((ins SrcOp:$src), 7450 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7451 Requires<Frag.Predicates> { 7452 // Load/store intrinsics are overloaded on pointer's address space. 7453 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7454 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7455 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 7456 // Build PatFrag that only matches particular address space. 7457 PatFrag IntrFrag = PatFrag<PFOperands, 7458 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 7459 !cond(!eq(Space, ".shared"): AS_match.shared, 7460 !eq(Space, ".global"): AS_match.global, 7461 1: AS_match.generic)>; 7462 // Build AS-constrained pattern. 7463 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7464 7465 let OutOperandList = Frag.Outs; 7466 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7467 let AsmString = "wmma.load." 7468 # Frag.frag 7469 # ".sync" 7470 # "${ptx:aligned}" 7471 # "." # Layout 7472 # "." # Frag.geom 7473 # Space 7474 # "." # Frag.ptx_elt_type # " \t" 7475 # Frag.regstring 7476 # ", [$src]" 7477 # !if(WithStride, ", $ldm", "") 7478 # ";"; 7479} 7480 7481// 7482// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7483// 7484class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 7485 bit WithStride, DAGOperand DstOp> 7486 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 7487 [!con((ins DstOp:$dst), 7488 Frag.Ins, 7489 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7490 Requires<Frag.Predicates> { 7491 7492 // Load/store intrinsics are overloaded on pointer's address space. 7493 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7494 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7495 dag PFOperands = !con((ops node:$dst), 7496 !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names), 7497 !if(WithStride, (ops node:$ldm), (ops))); 7498 // Build PatFrag that only matches particular address space. 7499 PatFrag IntrFrag = PatFrag<PFOperands, 7500 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 7501 !cond(!eq(Space, ".shared"): AS_match.shared, 7502 !eq(Space, ".global"): AS_match.global, 7503 1: AS_match.generic)>; 7504 // Build AS-constrained pattern. 7505 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7506 7507 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7508 let OutOperandList = (outs); 7509 let AsmString = "wmma.store.d.sync" 7510 # "${ptx:aligned}" 7511 # "." # Layout 7512 # "." # Frag.geom 7513 # Space 7514 # "." # Frag.ptx_elt_type 7515 # " \t[$dst]," 7516 # Frag.regstring 7517 # !if(WithStride, ", $ldm", "") 7518 # ";"; 7519} 7520 7521// Create all load/store variants 7522defset list<WMMA_INSTR> MMA_LDSTs = { 7523 foreach layout = ["row", "col"] in { 7524 foreach stride = [0, 1] in { 7525 foreach space = [".global", ".shared", ""] in { 7526 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 7527 foreach frag = NVVM_MMA_OPS.all_ld_ops in 7528 foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in 7529 def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>; 7530 foreach frag = NVVM_MMA_OPS.all_st_ops in 7531 foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in 7532 def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>; 7533 } // addr 7534 } // space 7535 } // stride 7536 } // layout 7537} // defset 7538 7539// WMMA.MMA 7540class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 7541 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 7542 string ALayout, string BLayout, int Satfinite> 7543 : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record, 7544 [FragA.Ins, FragB.Ins, FragC.Ins]>, 7545 // Requires does not seem to have effect on Instruction w/o Patterns. 7546 // We set it here anyways and propagate to the Pat<> we construct below. 7547 Requires<FragA.Predicates> { 7548 let OutOperandList = FragD.Outs; 7549 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7550 string TypeList = !cond( 7551 !eq(FragD.geom, "m8n8k4") : "." # FragD.ptx_elt_type 7552 # ".f16.f16." 7553 # FragC.ptx_elt_type, 7554 !eq(FragD.ptx_elt_type, "s32") : ".s32" 7555 # "." # FragA.ptx_elt_type 7556 # "." # FragB.ptx_elt_type 7557 # ".s32", 7558 1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type, 7559 ); 7560 let AsmString = !if(!eq(FragA.geom, "m8n8k4"), 7561 "mma.sync.aligned.m8n8k4" 7562 # "." # ALayout 7563 # "." # BLayout 7564 # TypeList # "\n\t\t" 7565 # FragD.regstring # ",\n\t\t" 7566 # FragA.regstring # ",\n\t\t" 7567 # FragB.regstring # ",\n\t\t" 7568 # FragC.regstring # ";", 7569 "wmma.mma" 7570 # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "") 7571 # ".sync" 7572 # "${ptx:aligned}" 7573 # "." # ALayout 7574 # "." # BLayout 7575 # "." # FragA.geom 7576 # TypeList 7577 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 7578 # FragD.regstring # ",\n\t\t" 7579 # FragA.regstring # ",\n\t\t" 7580 # FragB.regstring # ",\n\t\t" 7581 # FragC.regstring # ";"); 7582} 7583 7584defset list<WMMA_INSTR> MMAs = { 7585 foreach layout_a = ["row", "col"] in { 7586 foreach layout_b = ["row", "col"] in { 7587 foreach satf = [0, 1] in { 7588 foreach op = NVVM_MMA_OPS.all_mma_ops in { 7589 foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in { 7590 def : WMMA_MMA<WMMA_REGINFO<op[0]>, 7591 WMMA_REGINFO<op[1]>, 7592 WMMA_REGINFO<op[2]>, 7593 WMMA_REGINFO<op[3]>, 7594 layout_a, layout_b, satf>; 7595 } 7596 } // op 7597 } // satf 7598 } // layout_b 7599 } // layout_a 7600} // defset 7601 7602 7603// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 7604// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 7605// the instruction record. 7606class WMMA_PAT<WMMA_INSTR wi> 7607 : Pat<wi.IntrinsicPattern, 7608 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 7609 (wi ptx.version))>, 7610 Requires<wi.Predicates>; 7611 7612// Build intrinsic->instruction patterns for all MMA instructions. 7613foreach mma = !listconcat(MMAs, MMA_LDSTs) in 7614 def : WMMA_PAT<mma>; 7615