1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret, 55 [prefix # !add(n, -1)]), 56 []); 57} 58 59//----------------------------------- 60// Synchronization and shuffle functions 61//----------------------------------- 62let isConvergent = 1 in { 63def INT_BARRIER0 : NVPTXInst<(outs), (ins), 64 "bar.sync \t0;", 65 [(int_nvvm_barrier0)]>; 66def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 67 "bar.sync \t$src1;", 68 [(int_nvvm_barrier_n Int32Regs:$src1)]>; 69def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 70 "bar.sync \t$src1, $src2;", 71 [(int_nvvm_barrier Int32Regs:$src1, Int32Regs:$src2)]>; 72def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 73 !strconcat("{{ \n\t", 74 ".reg .pred \t%p1; \n\t", 75 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 76 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 77 "}}"), 78 [(set Int32Regs:$dst, (int_nvvm_barrier0_popc Int32Regs:$pred))]>; 79def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 80 !strconcat("{{ \n\t", 81 ".reg .pred \t%p1; \n\t", 82 ".reg .pred \t%p2; \n\t", 83 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 84 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 85 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 86 "}}"), 87 [(set Int32Regs:$dst, (int_nvvm_barrier0_and Int32Regs:$pred))]>; 88def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 89 !strconcat("{{ \n\t", 90 ".reg .pred \t%p1; \n\t", 91 ".reg .pred \t%p2; \n\t", 92 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 93 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 94 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 95 "}}"), 96 [(set Int32Regs:$dst, (int_nvvm_barrier0_or Int32Regs:$pred))]>; 97 98def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 99 [(int_nvvm_bar_sync imm:$i)]>; 100 101def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 102 [(int_nvvm_bar_warp_sync imm:$i)]>, 103 Requires<[hasPTX60, hasSM30]>; 104def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 105 [(int_nvvm_bar_warp_sync Int32Regs:$i)]>, 106 Requires<[hasPTX60, hasSM30]>; 107 108def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 109 [(int_nvvm_barrier_sync imm:$i)]>, 110 Requires<[hasPTX60, hasSM30]>; 111def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 112 [(int_nvvm_barrier_sync Int32Regs:$i)]>, 113 Requires<[hasPTX60, hasSM30]>; 114 115def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 116 "barrier.sync \t$id, $cnt;", 117 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>, 118 Requires<[hasPTX60, hasSM30]>; 119def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>, 122 Requires<[hasPTX60, hasSM30]>; 123def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>, 126 Requires<[hasPTX60, hasSM30]>; 127def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 130 Requires<[hasPTX60, hasSM30]>; 131 132 133// shfl.{up,down,bfly,idx}.b32 134multiclass SHFL<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 135 // The last two parameters to shfl can be regs or imms. ptxas is smart 136 // enough to inline constant registers, so strictly speaking we don't need to 137 // handle immediates here. But it's easy enough, and it makes our ptx more 138 // readable. 139 def reg : NVPTXInst< 140 (outs regclass:$dst), 141 (ins regclass:$src, Int32Regs:$offset, Int32Regs:$mask), 142 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 143 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, Int32Regs:$mask))]>; 144 145 def imm1 : NVPTXInst< 146 (outs regclass:$dst), 147 (ins regclass:$src, i32imm:$offset, Int32Regs:$mask), 148 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 149 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, Int32Regs:$mask))]>; 150 151 def imm2 : NVPTXInst< 152 (outs regclass:$dst), 153 (ins regclass:$src, Int32Regs:$offset, i32imm:$mask), 154 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 155 [(set regclass:$dst, (IntOp regclass:$src, Int32Regs:$offset, imm:$mask))]>; 156 157 def imm3 : NVPTXInst< 158 (outs regclass:$dst), 159 (ins regclass:$src, i32imm:$offset, i32imm:$mask), 160 !strconcat("shfl.", mode, ".b32 $dst, $src, $offset, $mask;"), 161 [(set regclass:$dst, (IntOp regclass:$src, imm:$offset, imm:$mask))]>; 162} 163 164defm INT_SHFL_DOWN_I32 : SHFL<Int32Regs, "down", int_nvvm_shfl_down_i32>; 165defm INT_SHFL_DOWN_F32 : SHFL<Float32Regs, "down", int_nvvm_shfl_down_f32>; 166defm INT_SHFL_UP_I32 : SHFL<Int32Regs, "up", int_nvvm_shfl_up_i32>; 167defm INT_SHFL_UP_F32 : SHFL<Float32Regs, "up", int_nvvm_shfl_up_f32>; 168defm INT_SHFL_BFLY_I32 : SHFL<Int32Regs, "bfly", int_nvvm_shfl_bfly_i32>; 169defm INT_SHFL_BFLY_F32 : SHFL<Float32Regs, "bfly", int_nvvm_shfl_bfly_f32>; 170defm INT_SHFL_IDX_I32 : SHFL<Int32Regs, "idx", int_nvvm_shfl_idx_i32>; 171defm INT_SHFL_IDX_F32 : SHFL<Float32Regs, "idx", int_nvvm_shfl_idx_f32>; 172 173multiclass SHFL_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 174 // Threadmask and the last two parameters to shfl.sync can be regs or imms. 175 // ptxas is smart enough to inline constant registers, so strictly speaking we 176 // don't need to handle immediates here. But it's easy enough, and it makes 177 // our ptx more readable. 178 def rrr : NVPTXInst< 179 (outs regclass:$dst), 180 (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask), 181 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 182 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, 183 Int32Regs:$offset, Int32Regs:$mask))]>; 184 185 def rri : NVPTXInst< 186 (outs regclass:$dst), 187 (ins Int32Regs:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask), 188 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 189 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, 190 Int32Regs:$offset, imm:$mask))]>; 191 192 def rir : NVPTXInst< 193 (outs regclass:$dst), 194 (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask), 195 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 196 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, 197 imm:$offset, Int32Regs:$mask))]>; 198 199 def rii : NVPTXInst< 200 (outs regclass:$dst), 201 (ins Int32Regs:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask), 202 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 203 [(set regclass:$dst, (IntOp Int32Regs:$threadmask, regclass:$src, 204 imm:$offset, imm:$mask))]>; 205 206 def irr : NVPTXInst< 207 (outs regclass:$dst), 208 (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, Int32Regs:$mask), 209 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 210 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, 211 Int32Regs:$offset, Int32Regs:$mask))]>; 212 213 def iri : NVPTXInst< 214 (outs regclass:$dst), 215 (ins i32imm:$threadmask, regclass:$src, Int32Regs:$offset, i32imm:$mask), 216 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 217 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, 218 Int32Regs:$offset, imm:$mask))]>; 219 220 def iir : NVPTXInst< 221 (outs regclass:$dst), 222 (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, Int32Regs:$mask), 223 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 224 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, 225 imm:$offset, Int32Regs:$mask))]>; 226 227 def iii : NVPTXInst< 228 (outs regclass:$dst), 229 (ins i32imm:$threadmask, regclass:$src, i32imm:$offset, i32imm:$mask), 230 !strconcat("shfl.sync.", mode, ".b32 $dst, $src, $offset, $mask, $threadmask;"), 231 [(set regclass:$dst, (IntOp imm:$threadmask, regclass:$src, 232 imm:$offset, imm:$mask))]>; 233} 234 235// On sm_70 these don't have to be convergent, so we may eventually want to 236// implement non-convergent variant of this intrinsic. 237defm INT_SHFL_SYNC_DOWN_I32 : SHFL_SYNC<Int32Regs, "down", int_nvvm_shfl_sync_down_i32>; 238defm INT_SHFL_SYNC_DOWN_F32 : SHFL_SYNC<Float32Regs, "down", int_nvvm_shfl_sync_down_f32>; 239defm INT_SHFL_SYNC_UP_I32 : SHFL_SYNC<Int32Regs, "up", int_nvvm_shfl_sync_up_i32>; 240defm INT_SHFL_SYNC_UP_F32 : SHFL_SYNC<Float32Regs, "up", int_nvvm_shfl_sync_up_f32>; 241defm INT_SHFL_SYNC_BFLY_I32 : SHFL_SYNC<Int32Regs, "bfly", int_nvvm_shfl_sync_bfly_i32>; 242defm INT_SHFL_SYNC_BFLY_F32 : SHFL_SYNC<Float32Regs, "bfly", int_nvvm_shfl_sync_bfly_f32>; 243defm INT_SHFL_SYNC_IDX_I32 : SHFL_SYNC<Int32Regs, "idx", int_nvvm_shfl_sync_idx_i32>; 244defm INT_SHFL_SYNC_IDX_F32 : SHFL_SYNC<Float32Regs, "idx", int_nvvm_shfl_sync_idx_f32>; 245 246 247// vote.{all,any,uni,ballot} 248multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 249 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 250 "vote." # mode # " \t$dest, $pred;", 251 [(set regclass:$dest, (IntOp Int1Regs:$pred))]>, 252 Requires<[hasPTX60, hasSM30]>; 253} 254 255defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 256defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 257defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 258defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 259 260// vote.sync.{all,any,uni,ballot} 261multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 262 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 263 "vote.sync." # mode # " \t$dest, $pred, $mask;", 264 [(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>, 265 Requires<[hasPTX60, hasSM30]>; 266 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 267 "vote.sync." # mode #" \t$dest, $pred, $mask;", 268 [(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>, 269 Requires<[hasPTX60, hasSM30]>; 270} 271 272defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 273defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 274defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 275defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 276 277multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 278 Operand ImmOp> { 279 def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), 280 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 281 [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, 282 Requires<[hasPTX60, hasSM70]>; 283 def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), 284 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 285 [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, 286 Requires<[hasPTX60, hasSM70]>; 287 def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), 288 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 289 [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, 290 Requires<[hasPTX60, hasSM70]>; 291 def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), 292 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 293 [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, 294 Requires<[hasPTX60, hasSM70]>; 295} 296 297defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 298 i32imm>; 299defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 300 i64imm>; 301 302multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 303 Operand ImmOp> { 304 def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 305 (ins i32imm:$mask, ImmOp:$value), 306 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 307 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, 308 Requires<[hasPTX60, hasSM70]>; 309 def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 310 (ins Int32Regs:$mask, ImmOp:$value), 311 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 312 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, 313 Requires<[hasPTX60, hasSM70]>; 314 def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 315 (ins i32imm:$mask, regclass:$value), 316 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 317 [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, 318 Requires<[hasPTX60, hasSM70]>; 319 def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), 320 (ins Int32Regs:$mask, regclass:$value), 321 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 322 [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, 323 Requires<[hasPTX60, hasSM70]>; 324} 325defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 326 i32imm>; 327defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 328 i64imm>; 329 330} // isConvergent = 1 331 332//----------------------------------- 333// Explicit Memory Fence Functions 334//----------------------------------- 335class MEMBAR<string StrOp, Intrinsic IntOP> : 336 NVPTXInst<(outs), (ins), 337 StrOp, [(IntOP)]>; 338 339def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 340def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 341def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 342 343 344//----------------------------------- 345// Math Functions 346//----------------------------------- 347 348// Map min(1.0, max(0.0, x)) to sat(x) 349// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 350// NaN 351// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 352// Same story for fmax, fmin. 353 354def : Pat<(int_nvvm_fmin_f immFloat1, 355 (int_nvvm_fmax_f immFloat0, Float32Regs:$a)), 356 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 357def : Pat<(int_nvvm_fmin_f immFloat1, 358 (int_nvvm_fmax_f Float32Regs:$a, immFloat0)), 359 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 360def : Pat<(int_nvvm_fmin_f 361 (int_nvvm_fmax_f immFloat0, Float32Regs:$a), immFloat1), 362 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 363def : Pat<(int_nvvm_fmin_f 364 (int_nvvm_fmax_f Float32Regs:$a, immFloat0), immFloat1), 365 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 366 367def : Pat<(int_nvvm_fmin_d immDouble1, 368 (int_nvvm_fmax_d immDouble0, Float64Regs:$a)), 369 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 370def : Pat<(int_nvvm_fmin_d immDouble1, 371 (int_nvvm_fmax_d Float64Regs:$a, immDouble0)), 372 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 373def : Pat<(int_nvvm_fmin_d 374 (int_nvvm_fmax_d immDouble0, Float64Regs:$a), immDouble1), 375 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 376def : Pat<(int_nvvm_fmin_d 377 (int_nvvm_fmax_d Float64Regs:$a, immDouble0), immDouble1), 378 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 379 380 381// We need a full string for OpcStr here because we need to deal with case like 382// INT_PTX_RECIP. 383class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 384 NVPTXRegClass src_regclass, Intrinsic IntOP> 385 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 386 OpcStr, 387 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>; 388 389// We need a full string for OpcStr here because we need to deal with the case 390// like INT_PTX_NATIVE_POWR_F. 391class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 392 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP> 393 : NVPTXInst<(outs t_regclass:$dst), 394 (ins s0_regclass:$src0, s1_regclass:$src1), 395 OpcStr, 396 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>; 397 398class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 399 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 400 NVPTXRegClass s2_regclass, Intrinsic IntOP> 401 : NVPTXInst<(outs t_regclass:$dst), 402 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 403 OpcStr, 404 [(set t_regclass:$dst, 405 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>; 406 407// 408// MISC 409// 410 411def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 412 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 413 414// 415// Min Max 416// 417 418def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 419 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 420def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 421 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 422 423def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 424 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 425def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 426 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 427 428def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 429 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 430def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 431 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 432 433 434// 435// Multiplication 436// 437 438def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 439 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 440def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 441 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 442 443def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 444 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 445def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 446 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 447 448def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 449 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 450def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 451 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 452def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 453 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 454def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 455 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 456def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 457 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 458def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 459 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 460def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 461 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 462def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 463 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 464 465def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 466 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 467def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 468 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 469def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 470 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 471def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 472 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 473 474def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 475 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 476def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 477 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 478 479// 480// Div 481// 482 483def INT_NVVM_DIV_APPROX_FTZ_F 484 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 485 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 486def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 487 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 488 489def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 490 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 491def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 492 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 493def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 494 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 495def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 496 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 497def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 498 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 499def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 500 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 501def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 502 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 503def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 504 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 505 506def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 507 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 508def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 509 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 510def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 511 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 512def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 513 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 514 515// 516// Sad 517// 518 519def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 520 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 521def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 522 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 523 524// 525// Floor Ceil 526// 527 528def : Pat<(int_nvvm_floor_ftz_f Float32Regs:$a), 529 (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 530def : Pat<(int_nvvm_floor_f Float32Regs:$a), 531 (CVT_f32_f32 Float32Regs:$a, CvtRMI)>; 532def : Pat<(int_nvvm_floor_d Float64Regs:$a), 533 (CVT_f64_f64 Float64Regs:$a, CvtRMI)>; 534 535def : Pat<(int_nvvm_ceil_ftz_f Float32Regs:$a), 536 (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 537def : Pat<(int_nvvm_ceil_f Float32Regs:$a), 538 (CVT_f32_f32 Float32Regs:$a, CvtRPI)>; 539def : Pat<(int_nvvm_ceil_d Float64Regs:$a), 540 (CVT_f64_f64 Float64Regs:$a, CvtRPI)>; 541 542// 543// Abs 544// 545 546def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 547 Float32Regs, int_nvvm_fabs_ftz_f>; 548def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 549 Float32Regs, int_nvvm_fabs_f>; 550 551def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 552 Float64Regs, int_nvvm_fabs_d>; 553 554// 555// Round 556// 557 558def : Pat<(int_nvvm_round_ftz_f Float32Regs:$a), 559 (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 560def : Pat<(int_nvvm_round_f Float32Regs:$a), 561 (CVT_f32_f32 Float32Regs:$a, CvtRNI)>; 562def : Pat<(int_nvvm_round_d Float64Regs:$a), 563 (CVT_f64_f64 Float64Regs:$a, CvtRNI)>; 564 565// 566// Trunc 567// 568 569def : Pat<(int_nvvm_trunc_ftz_f Float32Regs:$a), 570 (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 571def : Pat<(int_nvvm_trunc_f Float32Regs:$a), 572 (CVT_f32_f32 Float32Regs:$a, CvtRZI)>; 573def : Pat<(int_nvvm_trunc_d Float64Regs:$a), 574 (CVT_f64_f64 Float64Regs:$a, CvtRZI)>; 575 576// 577// Saturate 578// 579 580def : Pat<(int_nvvm_saturate_ftz_f Float32Regs:$a), 581 (CVT_f32_f32 Float32Regs:$a, CvtSAT_FTZ)>; 582def : Pat<(int_nvvm_saturate_f Float32Regs:$a), 583 (CVT_f32_f32 Float32Regs:$a, CvtSAT)>; 584def : Pat<(int_nvvm_saturate_d Float64Regs:$a), 585 (CVT_f64_f64 Float64Regs:$a, CvtSAT)>; 586 587// 588// Exp2 Log2 589// 590 591def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 592 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 593def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 594 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 595def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 596 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 597 598def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 599 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 600def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 601 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 602def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 603 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 604 605// 606// Sin Cos 607// 608 609def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 610 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 611def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 612 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 613 614def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 615 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 616def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 617 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 618 619// 620// Fma 621// 622 623def INT_NVVM_FMA_RN_FTZ_F 624 : F_MATH_3<"fma.rn.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 625 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_ftz_f>; 626def INT_NVVM_FMA_RN_F : F_MATH_3<"fma.rn.f32 \t$dst, $src0, $src1, $src2;", 627 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rn_f>; 628def INT_NVVM_FMA_RZ_FTZ_F 629 : F_MATH_3<"fma.rz.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 630 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_ftz_f>; 631def INT_NVVM_FMA_RZ_F : F_MATH_3<"fma.rz.f32 \t$dst, $src0, $src1, $src2;", 632 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rz_f>; 633def INT_NVVM_FMA_RM_FTZ_F 634 : F_MATH_3<"fma.rm.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 635 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_ftz_f>; 636def INT_NVVM_FMA_RM_F : F_MATH_3<"fma.rm.f32 \t$dst, $src0, $src1, $src2;", 637 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rm_f>; 638def INT_NVVM_FMA_RP_FTZ_F 639 : F_MATH_3<"fma.rp.ftz.f32 \t$dst, $src0, $src1, $src2;", Float32Regs, 640 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_ftz_f>; 641def INT_NVVM_FMA_RP_F : F_MATH_3<"fma.rp.f32 \t$dst, $src0, $src1, $src2;", 642 Float32Regs, Float32Regs, Float32Regs, Float32Regs, int_nvvm_fma_rp_f>; 643 644def INT_NVVM_FMA_RN_D : F_MATH_3<"fma.rn.f64 \t$dst, $src0, $src1, $src2;", 645 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rn_d>; 646def INT_NVVM_FMA_RZ_D : F_MATH_3<"fma.rz.f64 \t$dst, $src0, $src1, $src2;", 647 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rz_d>; 648def INT_NVVM_FMA_RM_D : F_MATH_3<"fma.rm.f64 \t$dst, $src0, $src1, $src2;", 649 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rm_d>; 650def INT_NVVM_FMA_RP_D : F_MATH_3<"fma.rp.f64 \t$dst, $src0, $src1, $src2;", 651 Float64Regs, Float64Regs, Float64Regs, Float64Regs, int_nvvm_fma_rp_d>; 652 653// 654// Rcp 655// 656 657def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 658 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 659def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 660 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 661def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 662 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 663def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 664 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 665def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 666 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 667def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 668 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 669def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 670 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 671def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 672 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 673 674def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 675 Float64Regs, int_nvvm_rcp_rn_d>; 676def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 677 Float64Regs, int_nvvm_rcp_rz_d>; 678def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 679 Float64Regs, int_nvvm_rcp_rm_d>; 680def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 681 Float64Regs, int_nvvm_rcp_rp_d>; 682 683def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 684 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 685 686// 687// Sqrt 688// 689 690def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 691 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 692def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 693 Float32Regs, int_nvvm_sqrt_rn_f>; 694def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 695 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 696def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 697 Float32Regs, int_nvvm_sqrt_rz_f>; 698def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 699 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 700def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 701 Float32Regs, int_nvvm_sqrt_rm_f>; 702def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 703 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 704def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 705 Float32Regs, int_nvvm_sqrt_rp_f>; 706def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 707 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 708def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 709 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 710 711def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 712 Float64Regs, int_nvvm_sqrt_rn_d>; 713def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 714 Float64Regs, int_nvvm_sqrt_rz_d>; 715def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 716 Float64Regs, int_nvvm_sqrt_rm_d>; 717def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 718 Float64Regs, int_nvvm_sqrt_rp_d>; 719 720// nvvm_sqrt intrinsic 721def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 722 (INT_NVVM_SQRT_RN_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 723def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 724 (INT_NVVM_SQRT_RN_F Float32Regs:$a)>, Requires<[do_SQRTF32_RN]>; 725def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 726 (INT_NVVM_SQRT_APPROX_FTZ_F Float32Regs:$a)>, Requires<[doF32FTZ]>; 727def : Pat<(int_nvvm_sqrt_f Float32Regs:$a), 728 (INT_NVVM_SQRT_APPROX_F Float32Regs:$a)>; 729 730// 731// Rsqrt 732// 733 734def INT_NVVM_RSQRT_APPROX_FTZ_F 735 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 736 int_nvvm_rsqrt_approx_ftz_f>; 737def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 738 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 739def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 740 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 741 742// 743// Add 744// 745 746def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 747 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 748def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 749 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 750def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 751 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 752def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 753 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 754def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 755 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 756def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 757 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 758def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 759 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 760def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 761 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 762 763def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 764 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 765def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 766 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 767def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 768 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 769def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 770 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 771 772// 773// Convert 774// 775 776def : Pat<(int_nvvm_d2f_rn_ftz Float64Regs:$a), 777 (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>; 778def : Pat<(int_nvvm_d2f_rn Float64Regs:$a), 779 (CVT_f32_f64 Float64Regs:$a, CvtRN)>; 780def : Pat<(int_nvvm_d2f_rz_ftz Float64Regs:$a), 781 (CVT_f32_f64 Float64Regs:$a, CvtRZ_FTZ)>; 782def : Pat<(int_nvvm_d2f_rz Float64Regs:$a), 783 (CVT_f32_f64 Float64Regs:$a, CvtRZ)>; 784def : Pat<(int_nvvm_d2f_rm_ftz Float64Regs:$a), 785 (CVT_f32_f64 Float64Regs:$a, CvtRM_FTZ)>; 786def : Pat<(int_nvvm_d2f_rm Float64Regs:$a), 787 (CVT_f32_f64 Float64Regs:$a, CvtRM)>; 788def : Pat<(int_nvvm_d2f_rp_ftz Float64Regs:$a), 789 (CVT_f32_f64 Float64Regs:$a, CvtRP_FTZ)>; 790def : Pat<(int_nvvm_d2f_rp Float64Regs:$a), 791 (CVT_f32_f64 Float64Regs:$a, CvtRP)>; 792 793def : Pat<(int_nvvm_d2i_rn Float64Regs:$a), 794 (CVT_s32_f64 Float64Regs:$a, CvtRNI)>; 795def : Pat<(int_nvvm_d2i_rz Float64Regs:$a), 796 (CVT_s32_f64 Float64Regs:$a, CvtRZI)>; 797def : Pat<(int_nvvm_d2i_rm Float64Regs:$a), 798 (CVT_s32_f64 Float64Regs:$a, CvtRMI)>; 799def : Pat<(int_nvvm_d2i_rp Float64Regs:$a), 800 (CVT_s32_f64 Float64Regs:$a, CvtRPI)>; 801 802def : Pat<(int_nvvm_d2ui_rn Float64Regs:$a), 803 (CVT_u32_f64 Float64Regs:$a, CvtRNI)>; 804def : Pat<(int_nvvm_d2ui_rz Float64Regs:$a), 805 (CVT_u32_f64 Float64Regs:$a, CvtRZI)>; 806def : Pat<(int_nvvm_d2ui_rm Float64Regs:$a), 807 (CVT_u32_f64 Float64Regs:$a, CvtRMI)>; 808def : Pat<(int_nvvm_d2ui_rp Float64Regs:$a), 809 (CVT_u32_f64 Float64Regs:$a, CvtRPI)>; 810 811def : Pat<(int_nvvm_i2d_rn Int32Regs:$a), 812 (CVT_f64_s32 Int32Regs:$a, CvtRN)>; 813def : Pat<(int_nvvm_i2d_rz Int32Regs:$a), 814 (CVT_f64_s32 Int32Regs:$a, CvtRZ)>; 815def : Pat<(int_nvvm_i2d_rm Int32Regs:$a), 816 (CVT_f64_s32 Int32Regs:$a, CvtRM)>; 817def : Pat<(int_nvvm_i2d_rp Int32Regs:$a), 818 (CVT_f64_s32 Int32Regs:$a, CvtRP)>; 819 820def : Pat<(int_nvvm_ui2d_rn Int32Regs:$a), 821 (CVT_f64_u32 Int32Regs:$a, CvtRN)>; 822def : Pat<(int_nvvm_ui2d_rz Int32Regs:$a), 823 (CVT_f64_u32 Int32Regs:$a, CvtRZ)>; 824def : Pat<(int_nvvm_ui2d_rm Int32Regs:$a), 825 (CVT_f64_u32 Int32Regs:$a, CvtRM)>; 826def : Pat<(int_nvvm_ui2d_rp Int32Regs:$a), 827 (CVT_f64_u32 Int32Regs:$a, CvtRP)>; 828 829def : Pat<(int_nvvm_f2i_rn_ftz Float32Regs:$a), 830 (CVT_s32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 831def : Pat<(int_nvvm_f2i_rn Float32Regs:$a), 832 (CVT_s32_f32 Float32Regs:$a, CvtRNI)>; 833def : Pat<(int_nvvm_f2i_rz_ftz Float32Regs:$a), 834 (CVT_s32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 835def : Pat<(int_nvvm_f2i_rz Float32Regs:$a), 836 (CVT_s32_f32 Float32Regs:$a, CvtRZI)>; 837def : Pat<(int_nvvm_f2i_rm_ftz Float32Regs:$a), 838 (CVT_s32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 839def : Pat<(int_nvvm_f2i_rm Float32Regs:$a), 840 (CVT_s32_f32 Float32Regs:$a, CvtRMI)>; 841def : Pat<(int_nvvm_f2i_rp_ftz Float32Regs:$a), 842 (CVT_s32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 843def : Pat<(int_nvvm_f2i_rp Float32Regs:$a), 844 (CVT_s32_f32 Float32Regs:$a, CvtRPI)>; 845 846def : Pat<(int_nvvm_f2ui_rn_ftz Float32Regs:$a), 847 (CVT_u32_f32 Float32Regs:$a, CvtRNI_FTZ)>; 848def : Pat<(int_nvvm_f2ui_rn Float32Regs:$a), 849 (CVT_u32_f32 Float32Regs:$a, CvtRNI)>; 850def : Pat<(int_nvvm_f2ui_rz_ftz Float32Regs:$a), 851 (CVT_u32_f32 Float32Regs:$a, CvtRZI_FTZ)>; 852def : Pat<(int_nvvm_f2ui_rz Float32Regs:$a), 853 (CVT_u32_f32 Float32Regs:$a, CvtRZI)>; 854def : Pat<(int_nvvm_f2ui_rm_ftz Float32Regs:$a), 855 (CVT_u32_f32 Float32Regs:$a, CvtRMI_FTZ)>; 856def : Pat<(int_nvvm_f2ui_rm Float32Regs:$a), 857 (CVT_u32_f32 Float32Regs:$a, CvtRMI)>; 858def : Pat<(int_nvvm_f2ui_rp_ftz Float32Regs:$a), 859 (CVT_u32_f32 Float32Regs:$a, CvtRPI_FTZ)>; 860def : Pat<(int_nvvm_f2ui_rp Float32Regs:$a), 861 (CVT_u32_f32 Float32Regs:$a, CvtRPI)>; 862 863def : Pat<(int_nvvm_i2f_rn Int32Regs:$a), 864 (CVT_f32_s32 Int32Regs:$a, CvtRN)>; 865def : Pat<(int_nvvm_i2f_rz Int32Regs:$a), 866 (CVT_f32_s32 Int32Regs:$a, CvtRZ)>; 867def : Pat<(int_nvvm_i2f_rm Int32Regs:$a), 868 (CVT_f32_s32 Int32Regs:$a, CvtRM)>; 869def : Pat<(int_nvvm_i2f_rp Int32Regs:$a), 870 (CVT_f32_s32 Int32Regs:$a, CvtRP)>; 871 872def : Pat<(int_nvvm_ui2f_rn Int32Regs:$a), 873 (CVT_f32_u32 Int32Regs:$a, CvtRN)>; 874def : Pat<(int_nvvm_ui2f_rz Int32Regs:$a), 875 (CVT_f32_u32 Int32Regs:$a, CvtRZ)>; 876def : Pat<(int_nvvm_ui2f_rm Int32Regs:$a), 877 (CVT_f32_u32 Int32Regs:$a, CvtRM)>; 878def : Pat<(int_nvvm_ui2f_rp Int32Regs:$a), 879 (CVT_f32_u32 Int32Regs:$a, CvtRP)>; 880 881def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 882 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 883 884def INT_NVVM_D2I_LO : F_MATH_1< 885 !strconcat("{{\n\t", 886 ".reg .b32 %temp; \n\t", 887 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 888 "}}"), 889 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 890def INT_NVVM_D2I_HI : F_MATH_1< 891 !strconcat("{{\n\t", 892 ".reg .b32 %temp; \n\t", 893 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 894 "}}"), 895 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 896 897def : Pat<(int_nvvm_f2ll_rn_ftz Float32Regs:$a), 898 (CVT_s64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 899def : Pat<(int_nvvm_f2ll_rn Float32Regs:$a), 900 (CVT_s64_f32 Float32Regs:$a, CvtRNI)>; 901def : Pat<(int_nvvm_f2ll_rz_ftz Float32Regs:$a), 902 (CVT_s64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 903def : Pat<(int_nvvm_f2ll_rz Float32Regs:$a), 904 (CVT_s64_f32 Float32Regs:$a, CvtRZI)>; 905def : Pat<(int_nvvm_f2ll_rm_ftz Float32Regs:$a), 906 (CVT_s64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 907def : Pat<(int_nvvm_f2ll_rm Float32Regs:$a), 908 (CVT_s64_f32 Float32Regs:$a, CvtRMI)>; 909def : Pat<(int_nvvm_f2ll_rp_ftz Float32Regs:$a), 910 (CVT_s64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 911def : Pat<(int_nvvm_f2ll_rp Float32Regs:$a), 912 (CVT_s64_f32 Float32Regs:$a, CvtRPI)>; 913 914def : Pat<(int_nvvm_f2ull_rn_ftz Float32Regs:$a), 915 (CVT_u64_f32 Float32Regs:$a, CvtRNI_FTZ)>; 916def : Pat<(int_nvvm_f2ull_rn Float32Regs:$a), 917 (CVT_u64_f32 Float32Regs:$a, CvtRNI)>; 918def : Pat<(int_nvvm_f2ull_rz_ftz Float32Regs:$a), 919 (CVT_u64_f32 Float32Regs:$a, CvtRZI_FTZ)>; 920def : Pat<(int_nvvm_f2ull_rz Float32Regs:$a), 921 (CVT_u64_f32 Float32Regs:$a, CvtRZI)>; 922def : Pat<(int_nvvm_f2ull_rm_ftz Float32Regs:$a), 923 (CVT_u64_f32 Float32Regs:$a, CvtRMI_FTZ)>; 924def : Pat<(int_nvvm_f2ull_rm Float32Regs:$a), 925 (CVT_u64_f32 Float32Regs:$a, CvtRMI)>; 926def : Pat<(int_nvvm_f2ull_rp_ftz Float32Regs:$a), 927 (CVT_u64_f32 Float32Regs:$a, CvtRPI_FTZ)>; 928def : Pat<(int_nvvm_f2ull_rp Float32Regs:$a), 929 (CVT_u64_f32 Float32Regs:$a, CvtRPI)>; 930 931def : Pat<(int_nvvm_d2ll_rn Float64Regs:$a), 932 (CVT_s64_f64 Float64Regs:$a, CvtRNI)>; 933def : Pat<(int_nvvm_d2ll_rz Float64Regs:$a), 934 (CVT_s64_f64 Float64Regs:$a, CvtRZI)>; 935def : Pat<(int_nvvm_d2ll_rm Float64Regs:$a), 936 (CVT_s64_f64 Float64Regs:$a, CvtRMI)>; 937def : Pat<(int_nvvm_d2ll_rp Float64Regs:$a), 938 (CVT_s64_f64 Float64Regs:$a, CvtRPI)>; 939 940def : Pat<(int_nvvm_d2ull_rn Float64Regs:$a), 941 (CVT_u64_f64 Float64Regs:$a, CvtRNI)>; 942def : Pat<(int_nvvm_d2ull_rz Float64Regs:$a), 943 (CVT_u64_f64 Float64Regs:$a, CvtRZI)>; 944def : Pat<(int_nvvm_d2ull_rm Float64Regs:$a), 945 (CVT_u64_f64 Float64Regs:$a, CvtRMI)>; 946def : Pat<(int_nvvm_d2ull_rp Float64Regs:$a), 947 (CVT_u64_f64 Float64Regs:$a, CvtRPI)>; 948 949def : Pat<(int_nvvm_ll2f_rn Int64Regs:$a), 950 (CVT_f32_s64 Int64Regs:$a, CvtRN)>; 951def : Pat<(int_nvvm_ll2f_rz Int64Regs:$a), 952 (CVT_f32_s64 Int64Regs:$a, CvtRZ)>; 953def : Pat<(int_nvvm_ll2f_rm Int64Regs:$a), 954 (CVT_f32_s64 Int64Regs:$a, CvtRM)>; 955def : Pat<(int_nvvm_ll2f_rp Int64Regs:$a), 956 (CVT_f32_s64 Int64Regs:$a, CvtRP)>; 957 958def : Pat<(int_nvvm_ull2f_rn Int64Regs:$a), 959 (CVT_f32_u64 Int64Regs:$a, CvtRN)>; 960def : Pat<(int_nvvm_ull2f_rz Int64Regs:$a), 961 (CVT_f32_u64 Int64Regs:$a, CvtRZ)>; 962def : Pat<(int_nvvm_ull2f_rm Int64Regs:$a), 963 (CVT_f32_u64 Int64Regs:$a, CvtRM)>; 964def : Pat<(int_nvvm_ull2f_rp Int64Regs:$a), 965 (CVT_f32_u64 Int64Regs:$a, CvtRP)>; 966 967def : Pat<(int_nvvm_ll2d_rn Int64Regs:$a), 968 (CVT_f64_s64 Int64Regs:$a, CvtRN)>; 969def : Pat<(int_nvvm_ll2d_rz Int64Regs:$a), 970 (CVT_f64_s64 Int64Regs:$a, CvtRZ)>; 971def : Pat<(int_nvvm_ll2d_rm Int64Regs:$a), 972 (CVT_f64_s64 Int64Regs:$a, CvtRM)>; 973def : Pat<(int_nvvm_ll2d_rp Int64Regs:$a), 974 (CVT_f64_s64 Int64Regs:$a, CvtRP)>; 975 976def : Pat<(int_nvvm_ull2d_rn Int64Regs:$a), 977 (CVT_f64_u64 Int64Regs:$a, CvtRN)>; 978def : Pat<(int_nvvm_ull2d_rz Int64Regs:$a), 979 (CVT_f64_u64 Int64Regs:$a, CvtRZ)>; 980def : Pat<(int_nvvm_ull2d_rm Int64Regs:$a), 981 (CVT_f64_u64 Int64Regs:$a, CvtRM)>; 982def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a), 983 (CVT_f64_u64 Int64Regs:$a, CvtRP)>; 984 985 986def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), 987 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; 988def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), 989 (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; 990 991// 992// Bitcast 993// 994 995def INT_NVVM_BITCAST_F2I : F_MATH_1<"mov.b32 \t$dst, $src0;", Int32Regs, 996 Float32Regs, int_nvvm_bitcast_f2i>; 997def INT_NVVM_BITCAST_I2F : F_MATH_1<"mov.b32 \t$dst, $src0;", Float32Regs, 998 Int32Regs, int_nvvm_bitcast_i2f>; 999 1000def INT_NVVM_BITCAST_LL2D : F_MATH_1<"mov.b64 \t$dst, $src0;", Float64Regs, 1001 Int64Regs, int_nvvm_bitcast_ll2d>; 1002def INT_NVVM_BITCAST_D2LL : F_MATH_1<"mov.b64 \t$dst, $src0;", Int64Regs, 1003 Float64Regs, int_nvvm_bitcast_d2ll>; 1004 1005// 1006// FNS 1007// 1008 1009class INT_FNS_MBO<dag ins, dag Operands> 1010 : NVPTXInst<(outs Int32Regs:$dst), ins, 1011 "fns.b32 \t$dst, $mask, $base, $offset;", 1012 [(set Int32Regs:$dst, Operands )]>, 1013 Requires<[hasPTX60, hasSM30]>; 1014 1015def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1016 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1017def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1018 (int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, imm:$offset)>; 1019def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1020 (int_nvvm_fns Int32Regs:$mask, imm:$base, Int32Regs:$offset)>; 1021def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1022 (int_nvvm_fns Int32Regs:$mask, imm:$base, imm:$offset)>; 1023def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1024 (int_nvvm_fns imm:$mask, Int32Regs:$base, Int32Regs:$offset)>; 1025def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1026 (int_nvvm_fns imm:$mask, Int32Regs:$base, imm:$offset)>; 1027def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1028 (int_nvvm_fns imm:$mask, imm:$base, Int32Regs:$offset)>; 1029def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1030 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1031 1032//----------------------------------- 1033// Atomic Functions 1034//----------------------------------- 1035 1036class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1037 : PatFrag<ops, frag, AS_match.global>; 1038class ATOMIC_SHARED_CHK <dag ops, dag frag> 1039 : PatFrag<ops, frag, AS_match.shared>; 1040class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1041 : PatFrag<ops, frag, AS_match.generic>; 1042 1043multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1044 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1045 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1046 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1047 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1048 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1049 Requires<Pred>; 1050 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1051 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1052 [(set regclass:$dst, (IntOp ptrclass:$addr, IMM:$b))]>, 1053 Requires<Pred>; 1054} 1055multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1056 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1057 list<Predicate> Pred = []> { 1058 defm p32 : F_ATOMIC_2_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1059 IntOp, IMMType, IMM, Pred>; 1060 defm p64 : F_ATOMIC_2_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1061 IntOp, IMMType, IMM, Pred>; 1062} 1063 1064// has 2 operands, neg the second one 1065multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1066 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1067 Operand IMMType, list<Predicate> Pred> { 1068 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1069 !strconcat( 1070 "{{ \n\t", 1071 ".reg \t.s", TypeStr, " temp; \n\t", 1072 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1073 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1074 "}}"), 1075 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b))]>, 1076 Requires<Pred>; 1077} 1078multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr, 1079 string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType, 1080 list<Predicate> Pred = []> { 1081 defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1082 IntOp, IMMType, Pred> ; 1083 defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1084 IntOp, IMMType, Pred> ; 1085} 1086 1087// has 3 operands 1088multiclass F_ATOMIC_3_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass, 1089 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1090 Operand IMMType, list<Predicate> Pred> { 1091 def reg : NVPTXInst<(outs regclass:$dst), 1092 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1093 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1094 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, regclass:$c))]>, 1095 Requires<Pred>; 1096 1097 def imm1 : NVPTXInst<(outs regclass:$dst), 1098 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1099 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1100 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, regclass:$c))]>, 1101 Requires<Pred>; 1102 1103 def imm2 : NVPTXInst<(outs regclass:$dst), 1104 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1105 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1106 [(set regclass:$dst, (IntOp ptrclass:$addr, regclass:$b, imm:$c))]>, 1107 Requires<Pred>; 1108 1109 def imm3 : NVPTXInst<(outs regclass:$dst), 1110 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1111 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1112 [(set regclass:$dst, (IntOp ptrclass:$addr, imm:$b, imm:$c))]>, 1113 Requires<Pred>; 1114} 1115multiclass F_ATOMIC_3<NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1116 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1117 defm p32 : F_ATOMIC_3_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr, 1118 IntOp, IMMType, Pred>; 1119 defm p64 : F_ATOMIC_3_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr, 1120 IntOp, IMMType, Pred>; 1121} 1122 1123// atom_add 1124 1125def atomic_load_add_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1126 (atomic_load_add_32 node:$a, node:$b)>; 1127def atomic_load_add_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1128 (atomic_load_add_32 node:$a, node:$b)>; 1129def atomic_load_add_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1130 (atomic_load_add_32 node:$a, node:$b)>; 1131def atomic_load_add_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1132 (atomic_load_add_64 node:$a, node:$b)>; 1133def atomic_load_add_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1134 (atomic_load_add_64 node:$a, node:$b)>; 1135def atomic_load_add_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1136 (atomic_load_add_64 node:$a, node:$b)>; 1137def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1138 (atomic_load_fadd node:$a, node:$b)>; 1139def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1140 (atomic_load_fadd node:$a, node:$b)>; 1141def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1142 (atomic_load_fadd node:$a, node:$b)>; 1143 1144defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add", 1145 atomic_load_add_32_g, i32imm, imm>; 1146defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".add", 1147 atomic_load_add_32_s, i32imm, imm>; 1148defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".add", 1149 atomic_load_add_32_gen, i32imm, imm>; 1150defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1151 ".add", atomic_load_add_32_gen, i32imm, imm>; 1152 1153defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", ".add", 1154 atomic_load_add_64_g, i64imm, imm>; 1155defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", ".add", 1156 atomic_load_add_64_s, i64imm, imm>; 1157defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".add", 1158 atomic_load_add_64_gen, i64imm, imm>; 1159defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1160 ".add", atomic_load_add_64_gen, i64imm, imm>; 1161 1162defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<Float32Regs, ".global", ".f32", ".add", 1163 atomic_load_add_g, f32imm, fpimm>; 1164defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add", 1165 atomic_load_add_s, f32imm, fpimm>; 1166defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add", 1167 atomic_load_add_gen, f32imm, fpimm>; 1168 1169defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add", 1170 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 1171defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add", 1172 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 1173defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add", 1174 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 1175 1176// atom_sub 1177 1178def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1179 (atomic_load_sub_32 node:$a, node:$b)>; 1180def atomic_load_sub_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1181 (atomic_load_sub_32 node:$a, node:$b)>; 1182def atomic_load_sub_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1183 (atomic_load_sub_32 node:$a, node:$b)>; 1184def atomic_load_sub_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1185 (atomic_load_sub_64 node:$a, node:$b)>; 1186def atomic_load_sub_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1187 (atomic_load_sub_64 node:$a, node:$b)>; 1188def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1189 (atomic_load_sub_64 node:$a, node:$b)>; 1190 1191defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add", 1192 atomic_load_sub_32_g, i32imm>; 1193defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add", 1194 atomic_load_sub_64_g, i64imm>; 1195defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add", 1196 atomic_load_sub_32_gen, i32imm>; 1197defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", 1198 ".add", atomic_load_sub_32_gen, i32imm>; 1199defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add", 1200 atomic_load_sub_32_s, i32imm>; 1201defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add", 1202 atomic_load_sub_64_s, i64imm>; 1203defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add", 1204 atomic_load_sub_64_gen, i64imm>; 1205defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", 1206 ".add", atomic_load_sub_64_gen, i64imm>; 1207 1208// atom_swap 1209 1210def atomic_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1211 (atomic_swap_32 node:$a, node:$b)>; 1212def atomic_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1213 (atomic_swap_32 node:$a, node:$b)>; 1214def atomic_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1215 (atomic_swap_32 node:$a, node:$b)>; 1216def atomic_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1217 (atomic_swap_64 node:$a, node:$b)>; 1218def atomic_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1219 (atomic_swap_64 node:$a, node:$b)>; 1220def atomic_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1221 (atomic_swap_64 node:$a, node:$b)>; 1222 1223defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".exch", 1224 atomic_swap_32_g, i32imm, imm>; 1225defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".exch", 1226 atomic_swap_32_s, i32imm, imm>; 1227defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".exch", 1228 atomic_swap_32_gen, i32imm, imm>; 1229defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1230 ".exch", atomic_swap_32_gen, i32imm, imm>; 1231defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".exch", 1232 atomic_swap_64_g, i64imm, imm>; 1233defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".exch", 1234 atomic_swap_64_s, i64imm, imm>; 1235defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".exch", 1236 atomic_swap_64_gen, i64imm, imm>; 1237defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1238 ".exch", atomic_swap_64_gen, i64imm, imm>; 1239 1240// atom_max 1241 1242def atomic_load_max_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1243 , (atomic_load_max_32 node:$a, node:$b)>; 1244def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1245 (atomic_load_max_32 node:$a, node:$b)>; 1246def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1247 (atomic_load_max_32 node:$a, node:$b)>; 1248def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 1249 , (atomic_load_max_64 node:$a, node:$b)>; 1250def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1251 (atomic_load_max_64 node:$a, node:$b)>; 1252def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1253 (atomic_load_max_64 node:$a, node:$b)>; 1254def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1255 (atomic_load_umax_32 node:$a, node:$b)>; 1256def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1257 (atomic_load_umax_32 node:$a, node:$b)>; 1258def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1259 (atomic_load_umax_32 node:$a, node:$b)>; 1260def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1261 (atomic_load_umax_64 node:$a, node:$b)>; 1262def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1263 (atomic_load_umax_64 node:$a, node:$b)>; 1264def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1265 (atomic_load_umax_64 node:$a, node:$b)>; 1266 1267defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1268 ".max", atomic_load_max_32_g, i32imm, imm>; 1269defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1270 ".max", atomic_load_max_32_s, i32imm, imm>; 1271defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", 1272 atomic_load_max_32_gen, i32imm, imm>; 1273defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1274 ".s32", ".max", atomic_load_max_32_gen, i32imm, imm>; 1275defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1276 ".max", atomic_load_max_64_g, i64imm, imm>; 1277defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1278 ".max", atomic_load_max_64_s, i64imm, imm>; 1279defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", 1280 atomic_load_max_64_gen, i64imm, imm>; 1281defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1282 ".s64", ".max", atomic_load_max_64_gen, i64imm, imm>; 1283defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1284 ".max", atomic_load_umax_32_g, i32imm, imm>; 1285defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1286 ".max", atomic_load_umax_32_s, i32imm, imm>; 1287defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", 1288 atomic_load_umax_32_gen, i32imm, imm>; 1289defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1290 ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>; 1291defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1292 ".max", atomic_load_umax_64_g, i64imm, imm>; 1293defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1294 ".max", atomic_load_umax_64_s, i64imm, imm>; 1295defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", 1296 atomic_load_umax_64_gen, i64imm, imm>; 1297defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1298 ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm>; 1299 1300// atom_min 1301 1302def atomic_load_min_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1303 (atomic_load_min_32 node:$a, node:$b)>; 1304def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1305 (atomic_load_min_32 node:$a, node:$b)>; 1306def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1307 (atomic_load_min_32 node:$a, node:$b)>; 1308def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1309 (atomic_load_min_64 node:$a, node:$b)>; 1310def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1311 (atomic_load_min_64 node:$a, node:$b)>; 1312def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1313 (atomic_load_min_64 node:$a, node:$b)>; 1314def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1315 (atomic_load_umin_32 node:$a, node:$b)>; 1316def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1317 (atomic_load_umin_32 node:$a, node:$b)>; 1318def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1319 (atomic_load_umin_32 node:$a, node:$b)>; 1320def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1321 (atomic_load_umin_64 node:$a, node:$b)>; 1322def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1323 (atomic_load_umin_64 node:$a, node:$b)>; 1324def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1325 (atomic_load_umin_64 node:$a, node:$b)>; 1326 1327defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", 1328 ".min", atomic_load_min_32_g, i32imm, imm>; 1329defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".s32", 1330 ".min", atomic_load_min_32_s, i32imm, imm>; 1331defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", 1332 atomic_load_min_32_gen, i32imm, imm>; 1333defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1334 ".s32", ".min", atomic_load_min_32_gen, i32imm, imm>; 1335defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", 1336 ".min", atomic_load_min_64_g, i64imm, imm>; 1337defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", 1338 ".min", atomic_load_min_64_s, i64imm, imm>; 1339defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", 1340 atomic_load_min_64_gen, i64imm, imm>; 1341defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1342 ".s64", ".min", atomic_load_min_64_gen, i64imm, imm>; 1343defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1344 ".min", atomic_load_umin_32_g, i32imm, imm>; 1345defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", 1346 ".min", atomic_load_umin_32_s, i32imm, imm>; 1347defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", 1348 atomic_load_umin_32_gen, i32imm, imm>; 1349defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", 1350 ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>; 1351defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", 1352 ".min", atomic_load_umin_64_g, i64imm, imm>; 1353defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", 1354 ".min", atomic_load_umin_64_s, i64imm, imm>; 1355defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", 1356 atomic_load_umin_64_gen, i64imm, imm>; 1357defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", 1358 ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm>; 1359 1360// atom_inc atom_dec 1361 1362def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1363 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1364def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1365 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1366def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1367 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 1368def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1369 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1370def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1371 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1372def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1373 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 1374 1375defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".inc", 1376 atomic_load_inc_32_g, i32imm, imm>; 1377defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".inc", 1378 atomic_load_inc_32_s, i32imm, imm>; 1379defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".inc", 1380 atomic_load_inc_32_gen, i32imm, imm>; 1381defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1382 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 1383defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".dec", 1384 atomic_load_dec_32_g, i32imm, imm>; 1385defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", ".dec", 1386 atomic_load_dec_32_s, i32imm, imm>; 1387defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".dec", 1388 atomic_load_dec_32_gen, i32imm, imm>; 1389defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", 1390 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 1391 1392// atom_and 1393 1394def atomic_load_and_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1395 (atomic_load_and_32 node:$a, node:$b)>; 1396def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1397 (atomic_load_and_32 node:$a, node:$b)>; 1398def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1399 (atomic_load_and_32 node:$a, node:$b)>; 1400def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1401 (atomic_load_and_64 node:$a, node:$b)>; 1402def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1403 (atomic_load_and_64 node:$a, node:$b)>; 1404def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1405 (atomic_load_and_64 node:$a, node:$b)>; 1406 1407defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", 1408 atomic_load_and_32_g, i32imm, imm>; 1409defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".and", 1410 atomic_load_and_32_s, i32imm, imm>; 1411defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", 1412 atomic_load_and_32_gen, i32imm, imm>; 1413defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1414 ".and", atomic_load_and_32_gen, i32imm, imm>; 1415defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", 1416 atomic_load_and_64_g, i64imm, imm>; 1417defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", 1418 atomic_load_and_64_s, i64imm, imm>; 1419defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", 1420 atomic_load_and_64_gen, i64imm, imm>; 1421defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1422 ".and", atomic_load_and_64_gen, i64imm, imm>; 1423 1424// atom_or 1425 1426def atomic_load_or_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1427 (atomic_load_or_32 node:$a, node:$b)>; 1428def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1429 (atomic_load_or_32 node:$a, node:$b)>; 1430def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1431 (atomic_load_or_32 node:$a, node:$b)>; 1432def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1433 (atomic_load_or_64 node:$a, node:$b)>; 1434def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1435 (atomic_load_or_64 node:$a, node:$b)>; 1436def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1437 (atomic_load_or_64 node:$a, node:$b)>; 1438 1439defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", 1440 atomic_load_or_32_g, i32imm, imm>; 1441defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".or", 1442 atomic_load_or_32_gen, i32imm, imm>; 1443defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1444 ".or", atomic_load_or_32_gen, i32imm, imm>; 1445defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", 1446 atomic_load_or_32_s, i32imm, imm>; 1447defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", 1448 atomic_load_or_64_g, i64imm, imm>; 1449defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", 1450 atomic_load_or_64_gen, i64imm, imm>; 1451defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1452 ".or", atomic_load_or_64_gen, i64imm, imm>; 1453defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", 1454 atomic_load_or_64_s, i64imm, imm>; 1455 1456// atom_xor 1457 1458def atomic_load_xor_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1459 (atomic_load_xor_32 node:$a, node:$b)>; 1460def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1461 (atomic_load_xor_32 node:$a, node:$b)>; 1462def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1463 (atomic_load_xor_32 node:$a, node:$b)>; 1464def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 1465 (atomic_load_xor_64 node:$a, node:$b)>; 1466def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 1467 (atomic_load_xor_64 node:$a, node:$b)>; 1468def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 1469 (atomic_load_xor_64 node:$a, node:$b)>; 1470 1471defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", 1472 atomic_load_xor_32_g, i32imm, imm>; 1473defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".xor", 1474 atomic_load_xor_32_s, i32imm, imm>; 1475defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", 1476 atomic_load_xor_32_gen, i32imm, imm>; 1477defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", 1478 ".xor", atomic_load_xor_32_gen, i32imm, imm>; 1479defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", 1480 atomic_load_xor_64_g, i64imm, imm>; 1481defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", 1482 atomic_load_xor_64_s, i64imm, imm>; 1483defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", 1484 atomic_load_xor_64_gen, i64imm, imm>; 1485defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", 1486 ".xor", atomic_load_xor_64_gen, i64imm, imm>; 1487 1488// atom_cas 1489 1490def atomic_cmp_swap_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1491 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1492def atomic_cmp_swap_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1493 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1494def atomic_cmp_swap_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1495 (atomic_cmp_swap_32 node:$a, node:$b, node:$c)>; 1496def atomic_cmp_swap_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 1497 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1498def atomic_cmp_swap_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 1499 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1500def atomic_cmp_swap_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 1501 (atomic_cmp_swap_64 node:$a, node:$b, node:$c)>; 1502 1503defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<Int32Regs, ".global", ".b32", ".cas", 1504 atomic_cmp_swap_32_g, i32imm>; 1505defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<Int32Regs, ".shared", ".b32", ".cas", 1506 atomic_cmp_swap_32_s, i32imm>; 1507defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<Int32Regs, "", ".b32", ".cas", 1508 atomic_cmp_swap_32_gen, i32imm>; 1509defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<Int32Regs, ".global", ".b32", 1510 ".cas", atomic_cmp_swap_32_gen, i32imm>; 1511defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<Int64Regs, ".global", ".b64", ".cas", 1512 atomic_cmp_swap_64_g, i64imm>; 1513defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<Int64Regs, ".shared", ".b64", ".cas", 1514 atomic_cmp_swap_64_s, i64imm>; 1515defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas", 1516 atomic_cmp_swap_64_gen, i64imm>; 1517defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64", 1518 ".cas", atomic_cmp_swap_64_gen, i64imm>; 1519 1520// Support for scoped atomic operations. Matches 1521// int_nvvm_atomic_{op}_{space}_{type}_{scope} 1522// and converts it into the appropriate instruction. 1523// NOTE: not all possible combinations are implemented 1524// 'space' is limited to generic as it's the only one needed to support CUDA. 1525// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 1526class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds, 1527 dag ins, dag Operands> 1528 : NVPTXInst<(outs regclass:$result), ins, 1529 AsmStr, 1530 [(set regclass:$result, Operands)]>, 1531 Requires<Preds>; 1532 1533// Define instruction variants for all addressing modes. 1534multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 1535 NVPTXRegClass regclass, Operand ImmType, 1536 SDNode Imm, ValueType ImmTy, 1537 list<Predicate> Preds> { 1538 let AddedComplexity = 1 in { 1539 def : ATOM23_impl<AsmStr, regclass, Preds, 1540 (ins Int32Regs:$src, regclass:$b), 1541 (Intr Int32Regs:$src, regclass:$b)>; 1542 def : ATOM23_impl<AsmStr, regclass, Preds, 1543 (ins Int64Regs:$src, regclass:$b), 1544 (Intr Int64Regs:$src, regclass:$b)>; 1545 } 1546 // tablegen can't infer argument types from Intrinsic (though it can 1547 // from Instruction) so we have to enforce specific type on 1548 // immediates via explicit cast to ImmTy. 1549 def : ATOM23_impl<AsmStr, regclass, Preds, 1550 (ins Int32Regs:$src, ImmType:$b), 1551 (Intr Int32Regs:$src, (ImmTy Imm:$b))>; 1552 def : ATOM23_impl<AsmStr, regclass, Preds, 1553 (ins Int64Regs:$src, ImmType:$b), 1554 (Intr Int64Regs:$src, (ImmTy Imm:$b))>; 1555} 1556 1557multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 1558 NVPTXRegClass regclass, Operand ImmType, 1559 SDNode Imm, ValueType ImmTy, 1560 list<Predicate> Preds> { 1561 // Variants for register/immediate permutations of $b and $c 1562 let AddedComplexity = 2 in { 1563 def : ATOM23_impl<AsmStr, regclass, Preds, 1564 (ins Int32Regs:$src, regclass:$b, regclass:$c), 1565 (Intr Int32Regs:$src, regclass:$b, regclass:$c)>; 1566 def : ATOM23_impl<AsmStr, regclass, Preds, 1567 (ins Int64Regs:$src, regclass:$b, regclass:$c), 1568 (Intr Int64Regs:$src, regclass:$b, regclass:$c)>; 1569 } 1570 let AddedComplexity = 1 in { 1571 def : ATOM23_impl<AsmStr, regclass, Preds, 1572 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 1573 (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1574 def : ATOM23_impl<AsmStr, regclass, Preds, 1575 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 1576 (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>; 1577 def : ATOM23_impl<AsmStr, regclass, Preds, 1578 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 1579 (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1580 def : ATOM23_impl<AsmStr, regclass, Preds, 1581 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 1582 (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>; 1583 } 1584 def : ATOM23_impl<AsmStr, regclass, Preds, 1585 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 1586 (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1587 def : ATOM23_impl<AsmStr, regclass, Preds, 1588 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 1589 (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 1590} 1591 1592// Constructs instrinsic name and instruction asm strings. 1593multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 1594 string ScopeStr, string SpaceStr, 1595 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1596 ValueType ImmTy, list<Predicate> Preds> { 1597 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1598 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1599 # "." # OpStr # "." # TypeStr 1600 # " \t$result, [$src], $b;", 1601 !cast<Intrinsic>( 1602 "int_nvvm_atomic_" # OpStr 1603 # "_" # SpaceStr # "_" # IntTypeStr 1604 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)), 1605 regclass, ImmType, Imm, ImmTy, Preds>; 1606} 1607multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 1608 string ScopeStr, string SpaceStr, 1609 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1610 ValueType ImmTy, list<Predicate> Preds> { 1611 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 1612 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 1613 # "." # OpStr # "." # TypeStr 1614 # " \t$result, [$src], $b, $c;", 1615 !cast<Intrinsic>( 1616 "int_nvvm_atomic_" # OpStr 1617 # "_" # SpaceStr # "_" # IntTypeStr 1618 # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)), 1619 regclass, ImmType, Imm, ImmTy, Preds>; 1620} 1621 1622// Constructs variants for different address spaces. 1623// For now we only need variants for generic space pointers. 1624multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 1625 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1626 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1627 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1628 regclass, ImmType, Imm, ImmTy, Preds>; 1629} 1630multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 1631 string ScopeStr, NVPTXRegClass regclass, Operand ImmType, 1632 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 1633 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 1634 regclass, ImmType, Imm, ImmTy, Preds>; 1635} 1636 1637// Constructs variants for different scopes of atomic op. 1638multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 1639 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 1640 ValueType ImmTy, list<Predicate> Preds> { 1641 // .gpu scope is default and is currently covered by existing 1642 // atomics w/o explicitly specified scope. 1643 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1644 regclass, ImmType, Imm, ImmTy, 1645 !listconcat(Preds,[hasAtomScope])>; 1646 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1647 regclass, ImmType, Imm, ImmTy, 1648 !listconcat(Preds,[hasAtomScope])>; 1649} 1650multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 1651 NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 1652 list<Predicate> Preds> { 1653 // No need to define ".gpu"-scoped atomics. They do the same thing 1654 // as the regular, non-scoped atomics defined elsewhere. 1655 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 1656 regclass, ImmType, Imm, ImmTy, 1657 !listconcat(Preds,[hasAtomScope])>; 1658 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 1659 regclass, ImmType, Imm, ImmTy, 1660 !listconcat(Preds,[hasAtomScope])>; 1661} 1662 1663// atom.add 1664multiclass ATOM2_add_impl<string OpStr> { 1665 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1666 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1667 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>; 1668 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32, 1669 []>; 1670 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64, 1671 [hasAtomAddF64]>; 1672} 1673 1674// atom.{and,or,xor} 1675multiclass ATOM2_bitwise_impl<string OpStr> { 1676 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1677 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, 1678 [hasAtomBitwise64]>; 1679} 1680 1681// atom.exch 1682multiclass ATOM2_exch_impl<string OpStr> { 1683 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1684 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1685} 1686 1687// atom.{min,max} 1688multiclass ATOM2_minmax_impl<string OpStr> { 1689 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>; 1690 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1691 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64, 1692 [hasAtomMinMax64]>; 1693 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, 1694 [hasAtomMinMax64]>; 1695} 1696 1697// atom.{inc,dec} 1698multiclass ATOM2_incdec_impl<string OpStr> { 1699 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>; 1700} 1701 1702// atom.cas 1703multiclass ATOM3_cas_impl<string OpStr> { 1704 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>; 1705 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>; 1706} 1707 1708defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 1709defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 1710defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 1711defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 1712defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 1713defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 1714defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 1715defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 1716defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 1717defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 1718 1719//----------------------------------- 1720// Support for ldu on sm_20 or later 1721//----------------------------------- 1722 1723// Don't annotate ldu instructions as mayLoad, as they load from memory that is 1724// read-only in a kernel. 1725 1726// Scalar 1727 1728multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 1729 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1730 !strconcat("ldu.global.", TyStr), 1731 []>, Requires<[hasLDU]>; 1732 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1733 !strconcat("ldu.global.", TyStr), 1734 []>, Requires<[hasLDU]>; 1735 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1736 !strconcat("ldu.global.", TyStr), 1737 []>, Requires<[hasLDU]>; 1738 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1739 !strconcat("ldu.global.", TyStr), 1740 []>, Requires<[hasLDU]>; 1741 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1742 !strconcat("ldu.global.", TyStr), 1743 []>, Requires<[hasLDU]>; 1744} 1745 1746defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 1747defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 1748defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1749defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1750defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; 1751defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; 1752defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 1753defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 1754defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 1755defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 1756 1757// vector 1758 1759// Elementized vector ldu 1760multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1761 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1762 (ins Int32Regs:$src), 1763 !strconcat("ldu.global.", TyStr), []>; 1764 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1765 (ins Int64Regs:$src), 1766 !strconcat("ldu.global.", TyStr), []>; 1767 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1768 (ins MEMri:$src), 1769 !strconcat("ldu.global.", TyStr), []>; 1770 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1771 (ins MEMri64:$src), 1772 !strconcat("ldu.global.", TyStr), []>; 1773 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1774 (ins imemAny:$src), 1775 !strconcat("ldu.global.", TyStr), []>; 1776} 1777 1778multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1779 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1780 regclass:$dst4), (ins Int32Regs:$src), 1781 !strconcat("ldu.global.", TyStr), []>; 1782 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1783 regclass:$dst4), (ins Int64Regs:$src), 1784 !strconcat("ldu.global.", TyStr), []>; 1785 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1786 regclass:$dst4), (ins MEMri:$src), 1787 !strconcat("ldu.global.", TyStr), []>; 1788 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1789 regclass:$dst4), (ins MEMri64:$src), 1790 !strconcat("ldu.global.", TyStr), []>; 1791 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1792 regclass:$dst4), (ins imemAny:$src), 1793 !strconcat("ldu.global.", TyStr), []>; 1794} 1795 1796defm INT_PTX_LDU_G_v2i8_ELE 1797 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1798defm INT_PTX_LDU_G_v2i16_ELE 1799 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1800defm INT_PTX_LDU_G_v2i32_ELE 1801 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1802defm INT_PTX_LDU_G_v2f16_ELE 1803 : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1804defm INT_PTX_LDU_G_v2f16x2_ELE 1805 : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1806defm INT_PTX_LDU_G_v2f32_ELE 1807 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1808defm INT_PTX_LDU_G_v2i64_ELE 1809 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1810defm INT_PTX_LDU_G_v2f64_ELE 1811 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1812defm INT_PTX_LDU_G_v4i8_ELE 1813 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1814defm INT_PTX_LDU_G_v4i16_ELE 1815 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1816 Int16Regs>; 1817defm INT_PTX_LDU_G_v4i32_ELE 1818 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1819 Int32Regs>; 1820defm INT_PTX_LDU_G_v4f16_ELE 1821 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1822 Float16Regs>; 1823defm INT_PTX_LDU_G_v4f16x2_ELE 1824 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1825 Float16x2Regs>; 1826defm INT_PTX_LDU_G_v4f32_ELE 1827 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 1828 Float32Regs>; 1829 1830 1831//----------------------------------- 1832// Support for ldg on sm_35 or later 1833//----------------------------------- 1834 1835// Don't annotate ld.global.nc as mayLoad, because these loads go through the 1836// non-coherent texture cache, and therefore the values read must be read-only 1837// during the lifetime of the kernel. 1838 1839multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 1840 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 1841 !strconcat("ld.global.nc.", TyStr), 1842 []>, Requires<[hasLDG]>; 1843 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 1844 !strconcat("ld.global.nc.", TyStr), 1845 []>, Requires<[hasLDG]>; 1846 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 1847 !strconcat("ld.global.nc.", TyStr), 1848 []>, Requires<[hasLDG]>; 1849 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 1850 !strconcat("ld.global.nc.", TyStr), 1851 []>, Requires<[hasLDG]>; 1852 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 1853 !strconcat("ld.global.nc.", TyStr), 1854 []>, Requires<[hasLDG]>; 1855} 1856 1857defm INT_PTX_LDG_GLOBAL_i8 1858 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 1859defm INT_PTX_LDG_GLOBAL_i16 1860 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 1861defm INT_PTX_LDG_GLOBAL_i32 1862 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 1863defm INT_PTX_LDG_GLOBAL_i64 1864 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 1865defm INT_PTX_LDG_GLOBAL_f16 1866 : LDG_G<"b16 \t$result, [$src];", Float16Regs>; 1867defm INT_PTX_LDG_GLOBAL_f16x2 1868 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; 1869defm INT_PTX_LDG_GLOBAL_f32 1870 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 1871defm INT_PTX_LDG_GLOBAL_f64 1872 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 1873defm INT_PTX_LDG_GLOBAL_p32 1874 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 1875defm INT_PTX_LDG_GLOBAL_p64 1876 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 1877 1878// vector 1879 1880// Elementized vector ldg 1881multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 1882 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1883 (ins Int32Regs:$src), 1884 !strconcat("ld.global.nc.", TyStr), []>; 1885 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1886 (ins Int64Regs:$src), 1887 !strconcat("ld.global.nc.", TyStr), []>; 1888 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1889 (ins MEMri:$src), 1890 !strconcat("ld.global.nc.", TyStr), []>; 1891 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1892 (ins MEMri64:$src), 1893 !strconcat("ld.global.nc.", TyStr), []>; 1894 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 1895 (ins imemAny:$src), 1896 !strconcat("ld.global.nc.", TyStr), []>; 1897} 1898 1899multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 1900 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1901 regclass:$dst4), (ins Int32Regs:$src), 1902 !strconcat("ld.global.nc.", TyStr), []>; 1903 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1904 regclass:$dst4), (ins Int64Regs:$src), 1905 !strconcat("ld.global.nc.", TyStr), []>; 1906 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1907 regclass:$dst4), (ins MEMri:$src), 1908 !strconcat("ld.global.nc.", TyStr), []>; 1909 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1910 regclass:$dst4), (ins MEMri64:$src), 1911 !strconcat("ld.global.nc.", TyStr), []>; 1912 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 1913 regclass:$dst4), (ins imemAny:$src), 1914 !strconcat("ld.global.nc.", TyStr), []>; 1915} 1916 1917// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 1918defm INT_PTX_LDG_G_v2i8_ELE 1919 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1920defm INT_PTX_LDG_G_v2i16_ELE 1921 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 1922defm INT_PTX_LDG_G_v2i32_ELE 1923 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 1924defm INT_PTX_LDG_G_v2f16_ELE 1925 : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; 1926defm INT_PTX_LDG_G_v2f16x2_ELE 1927 : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; 1928defm INT_PTX_LDG_G_v2f32_ELE 1929 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 1930defm INT_PTX_LDG_G_v2i64_ELE 1931 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 1932defm INT_PTX_LDG_G_v2f64_ELE 1933 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 1934defm INT_PTX_LDG_G_v4i8_ELE 1935 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1936defm INT_PTX_LDG_G_v4i16_ELE 1937 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 1938defm INT_PTX_LDG_G_v4i32_ELE 1939 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 1940defm INT_PTX_LDG_G_v4f16_ELE 1941 : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; 1942defm INT_PTX_LDG_G_v4f16x2_ELE 1943 : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; 1944defm INT_PTX_LDG_G_v4f32_ELE 1945 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 1946 1947 1948multiclass NG_TO_G<string Str, Intrinsic Intrin> { 1949 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1950 !strconcat("cvta.", Str, ".u32 \t$result, $src;"), 1951 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1952 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1953 !strconcat("cvta.", Str, ".u64 \t$result, $src;"), 1954 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1955 def _yes_6432 : NVPTXInst<(outs Int64Regs:$result), (ins Int32Regs:$src), 1956 "{{ .reg .b64 %tmp;\n\t" 1957 #" cvt.u64.u32 \t%tmp, $src;\n\t" 1958 #" cvta." # Str # ".u64 \t$result, %tmp; }}", 1959 [(set Int64Regs:$result, (Intrin Int32Regs:$src))]>, 1960 Requires<[useShortPtr]>; 1961} 1962 1963multiclass G_TO_NG<string Str, Intrinsic Intrin> { 1964 def _yes : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 1965 !strconcat("cvta.to.", Str, ".u32 \t$result, $src;"), 1966 [(set Int32Regs:$result, (Intrin Int32Regs:$src))]>; 1967 def _yes_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 1968 !strconcat("cvta.to.", Str, ".u64 \t$result, $src;"), 1969 [(set Int64Regs:$result, (Intrin Int64Regs:$src))]>; 1970 def _yes_3264 : NVPTXInst<(outs Int32Regs:$result), (ins Int64Regs:$src), 1971 "{{ .reg .b64 %tmp;\n\t" 1972 #" cvta.to." # Str # ".u64 \t%tmp, $src;\n\t" 1973 #" cvt.u32.u64 \t$result, %tmp; }}", 1974 [(set Int32Regs:$result, (Intrin Int64Regs:$src))]>, 1975 Requires<[useShortPtr]>; 1976} 1977 1978defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; 1979defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; 1980defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; 1981defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; 1982 1983defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; 1984defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; 1985defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; 1986defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; 1987 1988 1989// nvvm.ptr.gen.to.param 1990def nvvm_ptr_gen_to_param : NVPTXInst<(outs Int32Regs:$result), 1991 (ins Int32Regs:$src), 1992 "mov.u32 \t$result, $src;", 1993 [(set Int32Regs:$result, 1994 (int_nvvm_ptr_gen_to_param Int32Regs:$src))]>; 1995def nvvm_ptr_gen_to_param_64 : NVPTXInst<(outs Int64Regs:$result), 1996 (ins Int64Regs:$src), 1997 "mov.u64 \t$result, $src;", 1998 [(set Int64Regs:$result, 1999 (int_nvvm_ptr_gen_to_param Int64Regs:$src))]>; 2000 2001 2002// nvvm.move intrinsicc 2003def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2004 "mov.b16 \t$r, $s;", 2005 [(set Int16Regs:$r, 2006 (int_nvvm_move_i16 Int16Regs:$s))]>; 2007def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2008 "mov.b32 \t$r, $s;", 2009 [(set Int32Regs:$r, 2010 (int_nvvm_move_i32 Int32Regs:$s))]>; 2011def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2012 "mov.b64 \t$r, $s;", 2013 [(set Int64Regs:$r, 2014 (int_nvvm_move_i64 Int64Regs:$s))]>; 2015def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2016 "mov.f32 \t$r, $s;", 2017 [(set Float32Regs:$r, 2018 (int_nvvm_move_float Float32Regs:$s))]>; 2019def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2020 "mov.f64 \t$r, $s;", 2021 [(set Float64Regs:$r, 2022 (int_nvvm_move_double Float64Regs:$s))]>; 2023def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2024 "mov.u32 \t$r, $s;", 2025 [(set Int32Regs:$r, 2026 (int_nvvm_move_ptr Int32Regs:$s))]>; 2027def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2028 "mov.u64 \t$r, $s;", 2029 [(set Int64Regs:$r, 2030 (int_nvvm_move_ptr Int64Regs:$s))]>; 2031 2032// @TODO: Are these actually needed, or will we always just see symbols 2033// copied to registers first? 2034/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2035 "mov.u32 \t$r, $s;", 2036 [(set Int32Regs:$r, 2037 (int_nvvm_move_ptr texternalsym:$s))]>; 2038def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2039 "mov.u64 \t$r, $s;", 2040 [(set Int64Regs:$r, 2041 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2042 2043 2044// MoveParam %r1, param 2045// ptr_local_to_gen %r2, %r1 2046// ptr_gen_to_local %r3, %r2 2047// -> 2048// mov %r1, param 2049 2050// @TODO: Revisit this. There is a type 2051// contradiction between iPTRAny and iPTR for the addr defs, so the move_sym 2052// instructions are not currently defined. However, we can use the ptr 2053// variants and the asm printer will do the right thing. 2054def : Pat<(i64 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2055 (MoveParam texternalsym:$src)))), 2056 (nvvm_move_ptr64 texternalsym:$src)>; 2057def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen 2058 (MoveParam texternalsym:$src)))), 2059 (nvvm_move_ptr32 texternalsym:$src)>; 2060 2061def texsurf_handles 2062 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2063 "mov.u64 \t$result, $src;", []>; 2064 2065//----------------------------------- 2066// Compiler Error Warn 2067// - Just ignore them in codegen 2068//----------------------------------- 2069 2070def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2071 "// llvm.nvvm.compiler.warn()", 2072 [(int_nvvm_compiler_warn Int32Regs:$a)]>; 2073def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2074 "// llvm.nvvm.compiler.warn()", 2075 [(int_nvvm_compiler_warn Int64Regs:$a)]>; 2076def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2077 "// llvm.nvvm.compiler.error()", 2078 [(int_nvvm_compiler_error Int32Regs:$a)]>; 2079def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2080 "// llvm.nvvm.compiler.error()", 2081 [(int_nvvm_compiler_error Int64Regs:$a)]>; 2082 2083 2084// isspacep 2085 2086def ISSPACEP_CONST_32 2087 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2088 "isspacep.const \t$d, $a;", 2089 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, 2090 Requires<[hasPTX31]>; 2091def ISSPACEP_CONST_64 2092 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2093 "isspacep.const \t$d, $a;", 2094 [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, 2095 Requires<[hasPTX31]>; 2096def ISSPACEP_GLOBAL_32 2097 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2098 "isspacep.global \t$d, $a;", 2099 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; 2100def ISSPACEP_GLOBAL_64 2101 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2102 "isspacep.global \t$d, $a;", 2103 [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; 2104def ISSPACEP_LOCAL_32 2105 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2106 "isspacep.local \t$d, $a;", 2107 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; 2108def ISSPACEP_LOCAL_64 2109 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2110 "isspacep.local \t$d, $a;", 2111 [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; 2112def ISSPACEP_SHARED_32 2113 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2114 "isspacep.shared \t$d, $a;", 2115 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; 2116def ISSPACEP_SHARED_64 2117 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2118 "isspacep.shared \t$d, $a;", 2119 [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; 2120 2121 2122// Special register reads 2123def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2124 (ins SpecialRegs:$r), 2125 "mov.b32 \t$d, $r;", []>; 2126 2127def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2128def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2129def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2130def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2131def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2132def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2133def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2134def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2135def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2136def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2137def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2138def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2139def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2140def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2141def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2142def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2143def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2144def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2145def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2146def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2147def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2148def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2149def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2150def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2151def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2152def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2153def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2154def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 2155def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 2156def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 2157def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 2158def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 2159 2160 2161// rotate builtin support 2162 2163def ROTATE_B32_HW_IMM 2164 : NVPTXInst<(outs Int32Regs:$dst), 2165 (ins Int32Regs:$src, i32imm:$amt), 2166 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2167 [(set Int32Regs:$dst, 2168 (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, 2169 Requires<[hasHWROT32]> ; 2170 2171def ROTATE_B32_HW_REG 2172 : NVPTXInst<(outs Int32Regs:$dst), 2173 (ins Int32Regs:$src, Int32Regs:$amt), 2174 "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", 2175 [(set Int32Regs:$dst, 2176 (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, 2177 Requires<[hasHWROT32]> ; 2178 2179def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), 2180 (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2181 Requires<[noHWROT32]> ; 2182 2183def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), 2184 (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, 2185 Requires<[noHWROT32]> ; 2186 2187let hasSideEffects = 0 in { 2188 def GET_LO_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2189 !strconcat("{{\n\t", 2190 ".reg .b32 %dummy;\n\t", 2191 "mov.b64 \t{$dst,%dummy}, $src;\n\t", 2192 "}}"), 2193 []> ; 2194 2195 def GET_HI_INT64 : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), 2196 !strconcat("{{\n\t", 2197 ".reg .b32 %dummy;\n\t", 2198 "mov.b64 \t{%dummy,$dst}, $src;\n\t", 2199 "}}"), 2200 []> ; 2201} 2202 2203let hasSideEffects = 0 in { 2204 def PACK_TWO_INT32 2205 : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), 2206 "mov.b64 \t$dst, {{$lo, $hi}};", []> ; 2207} 2208 2209def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), 2210 (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), 2211 (GET_LO_INT64 Int64Regs:$src))> ; 2212 2213// Funnel shift, requires >= sm_32. Does not trap if amt is out of range, so 2214// no side effects. 2215let hasSideEffects = 0 in { 2216 def SHF_L_WRAP_B32_IMM 2217 : NVPTXInst<(outs Int32Regs:$dst), 2218 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2219 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2220 Requires<[hasHWROT32]>; 2221 2222 def SHF_L_WRAP_B32_REG 2223 : NVPTXInst<(outs Int32Regs:$dst), 2224 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2225 "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2226 Requires<[hasHWROT32]>; 2227 2228 def SHF_R_WRAP_B32_IMM 2229 : NVPTXInst<(outs Int32Regs:$dst), 2230 (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), 2231 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2232 Requires<[hasHWROT32]>; 2233 2234 def SHF_R_WRAP_B32_REG 2235 : NVPTXInst<(outs Int32Regs:$dst), 2236 (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), 2237 "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, 2238 Requires<[hasHWROT32]>; 2239} 2240 2241// HW version of rotate 64 2242def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2243 (PACK_TWO_INT32 2244 (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2245 (GET_LO_INT64 Int64Regs:$src), imm:$amt), 2246 (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2247 (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, 2248 Requires<[hasHWROT32]>; 2249 2250def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2251 (PACK_TWO_INT32 2252 (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2253 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), 2254 (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2255 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2256 Requires<[hasHWROT32]>; 2257 2258 2259def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2260 (PACK_TWO_INT32 2261 (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), 2262 (GET_HI_INT64 Int64Regs:$src), imm:$amt), 2263 (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), 2264 (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, 2265 Requires<[hasHWROT32]>; 2266 2267def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2268 (PACK_TWO_INT32 2269 (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), 2270 (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), 2271 (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), 2272 (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, 2273 Requires<[hasHWROT32]>; 2274 2275// SW version of rotate 64 2276def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), 2277 (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, 2278 Requires<[noHWROT32]>; 2279def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), 2280 (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2281 Requires<[noHWROT32]>; 2282def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), 2283 (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, 2284 Requires<[noHWROT32]>; 2285def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), 2286 (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, 2287 Requires<[noHWROT32]>; 2288 2289 2290//----------------------------------- 2291// Texture Intrinsics 2292//----------------------------------- 2293 2294// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 2295// also defined in NVPTXReplaceImageHandles.cpp 2296 2297// texmode_independent 2298let IsTex = 1, IsTexModeUnified = 0 in { 2299// Texture fetch instructions using handles 2300def TEX_1D_F32_S32 2301 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2302 Float32Regs:$b, Float32Regs:$a), 2303 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2304 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2305 []>; 2306def TEX_1D_F32_F32 2307 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2308 Float32Regs:$b, Float32Regs:$a), 2309 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2310 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2311 []>; 2312def TEX_1D_F32_F32_LEVEL 2313 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2314 Float32Regs:$b, Float32Regs:$a), 2315 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), 2316 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2317 "[$t, $s, \\{$x\\}], $lod;", 2318 []>; 2319def TEX_1D_F32_F32_GRAD 2320 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2321 Float32Regs:$b, Float32Regs:$a), 2322 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2323 Float32Regs:$gradx, Float32Regs:$grady), 2324 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2325 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2326 []>; 2327def TEX_1D_S32_S32 2328 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2329 Int32Regs:$b, Int32Regs:$a), 2330 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2331 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2332 []>; 2333def TEX_1D_S32_F32 2334 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2335 Int32Regs:$b, Int32Regs:$a), 2336 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2337 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2338 []>; 2339def TEX_1D_S32_F32_LEVEL 2340 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2341 Int32Regs:$b, Int32Regs:$a), 2342 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2343 Float32Regs:$lod), 2344 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2345 "[$t, $s, \\{$x\\}], $lod;", 2346 []>; 2347def TEX_1D_S32_F32_GRAD 2348 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2349 Int32Regs:$b, Int32Regs:$a), 2350 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2351 Float32Regs:$gradx, Float32Regs:$grady), 2352 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2353 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2354 []>; 2355def TEX_1D_U32_S32 2356 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2357 Int32Regs:$b, Int32Regs:$a), 2358 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), 2359 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2360 []>; 2361def TEX_1D_U32_F32 2362 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2363 Int32Regs:$b, Int32Regs:$a), 2364 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), 2365 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 2366 []>; 2367def TEX_1D_U32_F32_LEVEL 2368 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2369 Int32Regs:$b, Int32Regs:$a), 2370 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2371 Float32Regs:$lod), 2372 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2373 "[$t, $s, \\{$x\\}], $lod;", 2374 []>; 2375def TEX_1D_U32_F32_GRAD 2376 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2377 Int32Regs:$b, Int32Regs:$a), 2378 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, 2379 Float32Regs:$gradx, Float32Regs:$grady), 2380 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2381 "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 2382 []>; 2383 2384def TEX_1D_ARRAY_F32_S32 2385 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2386 Float32Regs:$b, Float32Regs:$a), 2387 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2388 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2389 "[$t, $s, \\{$l, $x\\}];", 2390 []>; 2391def TEX_1D_ARRAY_F32_F32 2392 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2393 Float32Regs:$b, Float32Regs:$a), 2394 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2395 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2396 "[$t, $s, \\{$l, $x\\}];", 2397 []>; 2398def TEX_1D_ARRAY_F32_F32_LEVEL 2399 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2400 Float32Regs:$b, Float32Regs:$a), 2401 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2402 Float32Regs:$lod), 2403 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2404 "[$t, $s, \\{$l, $x\\}], $lod;", 2405 []>; 2406def TEX_1D_ARRAY_F32_F32_GRAD 2407 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2408 Float32Regs:$b, Float32Regs:$a), 2409 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2410 Float32Regs:$gradx, Float32Regs:$grady), 2411 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2412 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2413 []>; 2414def TEX_1D_ARRAY_S32_S32 2415 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2416 Int32Regs:$b, Int32Regs:$a), 2417 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2418 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2419 "[$t, $s, \\{$l, $x\\}];", 2420 []>; 2421def TEX_1D_ARRAY_S32_F32 2422 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2423 Int32Regs:$b, Int32Regs:$a), 2424 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2425 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2426 "[$t, $s, \\{$l, $x\\}];", 2427 []>; 2428def TEX_1D_ARRAY_S32_F32_LEVEL 2429 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2430 Int32Regs:$b, Int32Regs:$a), 2431 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2432 Float32Regs:$lod), 2433 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2434 "[$t, $s, \\{$l, $x\\}], $lod;", 2435 []>; 2436def TEX_1D_ARRAY_S32_F32_GRAD 2437 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2438 Int32Regs:$b, Int32Regs:$a), 2439 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2440 Float32Regs:$gradx, Float32Regs:$grady), 2441 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2442 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2443 []>; 2444def TEX_1D_ARRAY_U32_S32 2445 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2446 Int32Regs:$b, Int32Regs:$a), 2447 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 2448 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2449 "[$t, $s, \\{$l, $x\\}];", 2450 []>; 2451def TEX_1D_ARRAY_U32_F32 2452 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2453 Int32Regs:$b, Int32Regs:$a), 2454 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), 2455 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2456 "[$t, $s, \\{$l, $x\\}];", 2457 []>; 2458def TEX_1D_ARRAY_U32_F32_LEVEL 2459 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2460 Int32Regs:$b, Int32Regs:$a), 2461 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2462 Float32Regs:$lod), 2463 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2464 "[$t, $s, \\{$l, $x\\}], $lod;", 2465 []>; 2466def TEX_1D_ARRAY_U32_F32_GRAD 2467 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2468 Int32Regs:$b, Int32Regs:$a), 2469 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2470 Float32Regs:$gradx, Float32Regs:$grady), 2471 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2472 "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 2473 []>; 2474 2475def TEX_2D_F32_S32 2476 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2477 Float32Regs:$b, Float32Regs:$a), 2478 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2479 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2480 "[$t, $s, \\{$x, $y\\}];", 2481 []>; 2482def TEX_2D_F32_F32 2483 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2484 Float32Regs:$b, Float32Regs:$a), 2485 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2486 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2487 "[$t, $s, \\{$x, $y\\}];", 2488 []>; 2489def TEX_2D_F32_F32_LEVEL 2490 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2491 Float32Regs:$b, Float32Regs:$a), 2492 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2493 Float32Regs:$lod), 2494 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2495 "[$t, $s, \\{$x, $y\\}], $lod;", 2496 []>; 2497def TEX_2D_F32_F32_GRAD 2498 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2499 Float32Regs:$b, Float32Regs:$a), 2500 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2501 Float32Regs:$gradx0, Float32Regs:$gradx1, 2502 Float32Regs:$grady0, Float32Regs:$grady1), 2503 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2504 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2505 "\\{$grady0, $grady1\\};", 2506 []>; 2507def TEX_2D_S32_S32 2508 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2509 Int32Regs:$b, Int32Regs:$a), 2510 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2511 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2512 "[$t, $s, \\{$x, $y\\}];", 2513 []>; 2514def TEX_2D_S32_F32 2515 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2516 Int32Regs:$b, Int32Regs:$a), 2517 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2518 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2519 "[$t, $s, \\{$x, $y\\}];", 2520 []>; 2521def TEX_2D_S32_F32_LEVEL 2522 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2523 Int32Regs:$b, Int32Regs:$a), 2524 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2525 Float32Regs:$lod), 2526 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2527 "[$t, $s, \\{$x, $y\\}], $lod;", 2528 []>; 2529def TEX_2D_S32_F32_GRAD 2530 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2531 Int32Regs:$b, Int32Regs:$a), 2532 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2533 Float32Regs:$gradx0, Float32Regs:$gradx1, 2534 Float32Regs:$grady0, Float32Regs:$grady1), 2535 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2536 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2537 "\\{$grady0, $grady1\\};", 2538 []>; 2539def TEX_2D_U32_S32 2540 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2541 Int32Regs:$b, Int32Regs:$a), 2542 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 2543 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2544 "[$t, $s, \\{$x, $y\\}];", 2545 []>; 2546def TEX_2D_U32_F32 2547 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2548 Int32Regs:$b, Int32Regs:$a), 2549 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2550 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2551 "[$t, $s, \\{$x, $y\\}];", 2552 []>; 2553def TEX_2D_U32_F32_LEVEL 2554 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2555 Int32Regs:$b, Int32Regs:$a), 2556 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2557 Float32Regs:$lod), 2558 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2559 "[$t, $s, \\{$x, $y\\}], $lod;", 2560 []>; 2561def TEX_2D_U32_F32_GRAD 2562 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2563 Int32Regs:$b, Int32Regs:$a), 2564 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2565 Float32Regs:$gradx0, Float32Regs:$gradx1, 2566 Float32Regs:$grady0, Float32Regs:$grady1), 2567 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2568 "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 2569 "\\{$grady0, $grady1\\};", 2570 []>; 2571 2572def TEX_2D_ARRAY_F32_S32 2573 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2574 Float32Regs:$b, Float32Regs:$a), 2575 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2576 Int32Regs:$y), 2577 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2578 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2579 []>; 2580def TEX_2D_ARRAY_F32_F32 2581 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2582 Float32Regs:$b, Float32Regs:$a), 2583 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2584 Float32Regs:$y), 2585 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2586 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2587 []>; 2588def TEX_2D_ARRAY_F32_F32_LEVEL 2589 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2590 Float32Regs:$b, Float32Regs:$a), 2591 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2592 Float32Regs:$y, Float32Regs:$lod), 2593 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2594 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2595 []>; 2596def TEX_2D_ARRAY_F32_F32_GRAD 2597 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2598 Float32Regs:$b, Float32Regs:$a), 2599 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2600 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 2601 Float32Regs:$grady0, Float32Regs:$grady1), 2602 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2603 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2604 "\\{$grady0, $grady1\\};", 2605 []>; 2606def TEX_2D_ARRAY_S32_S32 2607 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2608 Int32Regs:$b, Int32Regs:$a), 2609 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2610 Int32Regs:$y), 2611 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2612 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2613 []>; 2614def TEX_2D_ARRAY_S32_F32 2615 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2616 Int32Regs:$b, Int32Regs:$a), 2617 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2618 Float32Regs:$y), 2619 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2620 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2621 []>; 2622def TEX_2D_ARRAY_S32_F32_LEVEL 2623 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2624 Int32Regs:$b, Int32Regs:$a), 2625 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2626 Float32Regs:$y, Float32Regs:$lod), 2627 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2628 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2629 []>; 2630def TEX_2D_ARRAY_S32_F32_GRAD 2631 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2632 Int32Regs:$b, Int32Regs:$a), 2633 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2634 Float32Regs:$y, 2635 Float32Regs:$gradx0, Float32Regs:$gradx1, 2636 Float32Regs:$grady0, Float32Regs:$grady1), 2637 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2638 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2639 "\\{$grady0, $grady1\\};", 2640 []>; 2641def TEX_2D_ARRAY_U32_S32 2642 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2643 Int32Regs:$b, Int32Regs:$a), 2644 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 2645 Int32Regs:$y), 2646 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2647 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2648 []>; 2649def TEX_2D_ARRAY_U32_F32 2650 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2651 Int32Regs:$b, Int32Regs:$a), 2652 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2653 Float32Regs:$y), 2654 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2655 "[$t, $s, \\{$l, $x, $y, $y\\}];", 2656 []>; 2657def TEX_2D_ARRAY_U32_F32_LEVEL 2658 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2659 Int32Regs:$b, Int32Regs:$a), 2660 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2661 Float32Regs:$y, Float32Regs:$lod), 2662 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2663 "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 2664 []>; 2665def TEX_2D_ARRAY_U32_F32_GRAD 2666 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2667 Int32Regs:$b, Int32Regs:$a), 2668 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, 2669 Float32Regs:$y, 2670 Float32Regs:$gradx0, Float32Regs:$gradx1, 2671 Float32Regs:$grady0, Float32Regs:$grady1), 2672 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2673 "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 2674 "\\{$grady0, $grady1\\};", 2675 []>; 2676 2677def TEX_3D_F32_S32 2678 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2679 Float32Regs:$b, Float32Regs:$a), 2680 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2681 Int32Regs:$z), 2682 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 2683 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2684 []>; 2685def TEX_3D_F32_F32 2686 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2687 Float32Regs:$b, Float32Regs:$a), 2688 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2689 Float32Regs:$z), 2690 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2691 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2692 []>; 2693def TEX_3D_F32_F32_LEVEL 2694 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2695 Float32Regs:$b, Float32Regs:$a), 2696 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2697 Float32Regs:$z, Float32Regs:$lod), 2698 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2699 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2700 []>; 2701def TEX_3D_F32_F32_GRAD 2702 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2703 Float32Regs:$b, Float32Regs:$a), 2704 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2705 Float32Regs:$z, 2706 Float32Regs:$gradx0, Float32Regs:$gradx1, 2707 Float32Regs:$gradx2, Float32Regs:$grady0, 2708 Float32Regs:$grady1, Float32Regs:$grady2), 2709 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2710 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2711 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2712 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2713 []>; 2714def TEX_3D_S32_S32 2715 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2716 Int32Regs:$b, Int32Regs:$a), 2717 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2718 Int32Regs:$z), 2719 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 2720 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2721 []>; 2722def TEX_3D_S32_F32 2723 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2724 Int32Regs:$b, Int32Regs:$a), 2725 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2726 Float32Regs:$z), 2727 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2728 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2729 []>; 2730def TEX_3D_S32_F32_LEVEL 2731 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2732 Int32Regs:$b, Int32Regs:$a), 2733 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2734 Float32Regs:$z, Float32Regs:$lod), 2735 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2736 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2737 []>; 2738def TEX_3D_S32_F32_GRAD 2739 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2740 Int32Regs:$b, Int32Regs:$a), 2741 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2742 Float32Regs:$z, 2743 Float32Regs:$gradx0, Float32Regs:$gradx1, 2744 Float32Regs:$gradx2, Float32Regs:$grady0, 2745 Float32Regs:$grady1, Float32Regs:$grady2), 2746 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2747 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2748 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2749 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2750 []>; 2751def TEX_3D_U32_S32 2752 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2753 Int32Regs:$b, Int32Regs:$a), 2754 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 2755 Int32Regs:$z), 2756 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 2757 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2758 []>; 2759def TEX_3D_U32_F32 2760 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2761 Int32Regs:$b, Int32Regs:$a), 2762 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2763 Float32Regs:$z), 2764 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2765 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2766 []>; 2767def TEX_3D_U32_F32_LEVEL 2768 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2769 Int32Regs:$b, Int32Regs:$a), 2770 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2771 Float32Regs:$z, Float32Regs:$lod), 2772 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2773 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2774 []>; 2775def TEX_3D_U32_F32_GRAD 2776 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2777 Int32Regs:$b, Int32Regs:$a), 2778 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, 2779 Float32Regs:$z, 2780 Float32Regs:$gradx0, Float32Regs:$gradx1, 2781 Float32Regs:$gradx2, Float32Regs:$grady0, 2782 Float32Regs:$grady1, Float32Regs:$grady2), 2783 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2784 "[$t, $s, \\{$x, $y, $z, $z\\}], " 2785 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 2786 "\\{$grady0, $grady1, $grady2, $grady2\\};", 2787 []>; 2788 2789def TEX_CUBE_F32_F32 2790 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2791 Float32Regs:$b, Float32Regs:$a), 2792 (ins Int64Regs:$t, Int64Regs:$s, 2793 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2794 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2795 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2796 []>; 2797def TEX_CUBE_F32_F32_LEVEL 2798 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2799 Float32Regs:$b, Float32Regs:$a), 2800 (ins Int64Regs:$t, Int64Regs:$s, 2801 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2802 Float32Regs:$lod), 2803 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2804 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2805 []>; 2806def TEX_CUBE_S32_F32 2807 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2808 Int32Regs:$b, Int32Regs:$a), 2809 (ins Int64Regs:$t, Int64Regs:$s, 2810 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2811 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2812 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2813 []>; 2814def TEX_CUBE_S32_F32_LEVEL 2815 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2816 Int32Regs:$b, Int32Regs:$a), 2817 (ins Int64Regs:$t, Int64Regs:$s, 2818 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2819 Float32Regs:$lod), 2820 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2821 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2822 []>; 2823def TEX_CUBE_U32_F32 2824 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2825 Int32Regs:$b, Int32Regs:$a), 2826 (ins Int64Regs:$t, Int64Regs:$s, 2827 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2828 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2829 "[$t, $s, \\{$x, $y, $z, $z\\}];", 2830 []>; 2831def TEX_CUBE_U32_F32_LEVEL 2832 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2833 Int32Regs:$b, Int32Regs:$a), 2834 (ins Int64Regs:$t, Int64Regs:$s, 2835 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2836 Float32Regs:$lod), 2837 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2838 "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 2839 []>; 2840 2841def TEX_CUBE_ARRAY_F32_F32 2842 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2843 Float32Regs:$b, Float32Regs:$a), 2844 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2845 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2846 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2847 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2848 []>; 2849def TEX_CUBE_ARRAY_F32_F32_LEVEL 2850 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2851 Float32Regs:$b, Float32Regs:$a), 2852 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2853 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2854 Float32Regs:$lod), 2855 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 2856 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2857 []>; 2858def TEX_CUBE_ARRAY_S32_F32 2859 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2860 Int32Regs:$b, Int32Regs:$a), 2861 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2862 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2863 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2864 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2865 []>; 2866def TEX_CUBE_ARRAY_S32_F32_LEVEL 2867 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2868 Int32Regs:$b, Int32Regs:$a), 2869 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2870 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2871 Float32Regs:$lod), 2872 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 2873 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2874 []>; 2875def TEX_CUBE_ARRAY_U32_F32 2876 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2877 Int32Regs:$b, Int32Regs:$a), 2878 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2879 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 2880 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2881 "[$t, $s, \\{$l, $x, $y, $z\\}];", 2882 []>; 2883def TEX_CUBE_ARRAY_U32_F32_LEVEL 2884 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 2885 Int32Regs:$b, Int32Regs:$a), 2886 (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, 2887 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 2888 Float32Regs:$lod), 2889 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 2890 "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 2891 []>; 2892 2893def TLD4_R_2D_F32_F32 2894 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2895 Float32Regs:$v2, Float32Regs:$v3), 2896 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2897 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2898 "[$t, $s, \\{$x, $y\\}];", 2899 []>; 2900def TLD4_G_2D_F32_F32 2901 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2902 Float32Regs:$v2, Float32Regs:$v3), 2903 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2904 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2905 "[$t, $s, \\{$x, $y\\}];", 2906 []>; 2907def TLD4_B_2D_F32_F32 2908 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2909 Float32Regs:$v2, Float32Regs:$v3), 2910 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2911 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2912 "[$t, $s, \\{$x, $y\\}];", 2913 []>; 2914def TLD4_A_2D_F32_F32 2915 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 2916 Float32Regs:$v2, Float32Regs:$v3), 2917 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2918 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2919 "[$t, $s, \\{$x, $y\\}];", 2920 []>; 2921def TLD4_R_2D_S32_F32 2922 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2923 Int32Regs:$v2, Int32Regs:$v3), 2924 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2925 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2926 "[$t, $s, \\{$x, $y\\}];", 2927 []>; 2928def TLD4_G_2D_S32_F32 2929 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2930 Int32Regs:$v2, Int32Regs:$v3), 2931 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2932 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2933 "[$t, $s, \\{$x, $y\\}];", 2934 []>; 2935def TLD4_B_2D_S32_F32 2936 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2937 Int32Regs:$v2, Int32Regs:$v3), 2938 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2939 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2940 "[$t, $s, \\{$x, $y\\}];", 2941 []>; 2942def TLD4_A_2D_S32_F32 2943 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2944 Int32Regs:$v2, Int32Regs:$v3), 2945 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2946 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2947 "[$t, $s, \\{$x, $y\\}];", 2948 []>; 2949def TLD4_R_2D_U32_F32 2950 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2951 Int32Regs:$v2, Int32Regs:$v3), 2952 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2953 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2954 "[$t, $s, \\{$x, $y\\}];", 2955 []>; 2956def TLD4_G_2D_U32_F32 2957 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2958 Int32Regs:$v2, Int32Regs:$v3), 2959 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2960 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2961 "[$t, $s, \\{$x, $y\\}];", 2962 []>; 2963def TLD4_B_2D_U32_F32 2964 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2965 Int32Regs:$v2, Int32Regs:$v3), 2966 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2967 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2968 "[$t, $s, \\{$x, $y\\}];", 2969 []>; 2970def TLD4_A_2D_U32_F32 2971 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 2972 Int32Regs:$v2, Int32Regs:$v3), 2973 (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), 2974 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 2975 "[$t, $s, \\{$x, $y\\}];", 2976 []>; 2977} 2978 2979 2980// texmode_unified 2981let IsTex = 1, IsTexModeUnified = 1 in { 2982// Texture fetch instructions using handles 2983def TEX_UNIFIED_1D_F32_S32 2984 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2985 Float32Regs:$b, Float32Regs:$a), 2986 (ins Int64Regs:$t, Int32Regs:$x), 2987 "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2988 []>; 2989def TEX_UNIFIED_1D_F32_F32 2990 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2991 Float32Regs:$b, Float32Regs:$a), 2992 (ins Int64Regs:$t, Float32Regs:$x), 2993 "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 2994 []>; 2995def TEX_UNIFIED_1D_F32_F32_LEVEL 2996 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 2997 Float32Regs:$b, Float32Regs:$a), 2998 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod), 2999 "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3000 "[$t, \\{$x\\}], $lod;", 3001 []>; 3002def TEX_UNIFIED_1D_F32_F32_GRAD 3003 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3004 Float32Regs:$b, Float32Regs:$a), 3005 (ins Int64Regs:$t, Float32Regs:$x, 3006 Float32Regs:$gradx, Float32Regs:$grady), 3007 "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3008 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3009 []>; 3010def TEX_UNIFIED_1D_S32_S32 3011 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3012 Int32Regs:$b, Int32Regs:$a), 3013 (ins Int64Regs:$t, Int32Regs:$x), 3014 "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3015 []>; 3016def TEX_UNIFIED_1D_S32_F32 3017 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3018 Int32Regs:$b, Int32Regs:$a), 3019 (ins Int64Regs:$t, Float32Regs:$x), 3020 "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3021 []>; 3022def TEX_UNIFIED_1D_S32_F32_LEVEL 3023 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3024 Int32Regs:$b, Int32Regs:$a), 3025 (ins Int64Regs:$t, Float32Regs:$x, 3026 Float32Regs:$lod), 3027 "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3028 "[$t, \\{$x\\}], $lod;", 3029 []>; 3030def TEX_UNIFIED_1D_S32_F32_GRAD 3031 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3032 Int32Regs:$b, Int32Regs:$a), 3033 (ins Int64Regs:$t, Float32Regs:$x, 3034 Float32Regs:$gradx, Float32Regs:$grady), 3035 "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3036 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3037 []>; 3038def TEX_UNIFIED_1D_U32_S32 3039 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3040 Int32Regs:$b, Int32Regs:$a), 3041 (ins Int64Regs:$t, Int32Regs:$x), 3042 "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3043 []>; 3044def TEX_UNIFIED_1D_U32_F32 3045 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3046 Int32Regs:$b, Int32Regs:$a), 3047 (ins Int64Regs:$t, Float32Regs:$x), 3048 "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3049 []>; 3050def TEX_UNIFIED_1D_U32_F32_LEVEL 3051 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3052 Int32Regs:$b, Int32Regs:$a), 3053 (ins Int64Regs:$t, Float32Regs:$x, 3054 Float32Regs:$lod), 3055 "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3056 "[$t, \\{$x\\}], $lod;", 3057 []>; 3058def TEX_UNIFIED_1D_U32_F32_GRAD 3059 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3060 Int32Regs:$b, Int32Regs:$a), 3061 (ins Int64Regs:$t, Float32Regs:$x, 3062 Float32Regs:$gradx, Float32Regs:$grady), 3063 "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3064 "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3065 []>; 3066 3067def TEX_UNIFIED_1D_ARRAY_F32_S32 3068 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3069 Float32Regs:$b, Float32Regs:$a), 3070 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3071 "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3072 "[$t, \\{$l, $x\\}];", 3073 []>; 3074def TEX_UNIFIED_1D_ARRAY_F32_F32 3075 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3076 Float32Regs:$b, Float32Regs:$a), 3077 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3078 "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3079 "[$t, \\{$l, $x\\}];", 3080 []>; 3081def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3082 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3083 Float32Regs:$b, Float32Regs:$a), 3084 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3085 Float32Regs:$lod), 3086 "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3087 "[$t, \\{$l, $x\\}], $lod;", 3088 []>; 3089def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3090 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3091 Float32Regs:$b, Float32Regs:$a), 3092 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3093 Float32Regs:$gradx, Float32Regs:$grady), 3094 "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3095 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3096 []>; 3097def TEX_UNIFIED_1D_ARRAY_S32_S32 3098 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3099 Int32Regs:$b, Int32Regs:$a), 3100 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3101 "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3102 "[$t, \\{$l, $x\\}];", 3103 []>; 3104def TEX_UNIFIED_1D_ARRAY_S32_F32 3105 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3106 Int32Regs:$b, Int32Regs:$a), 3107 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3108 "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3109 "[$t, \\{$l, $x\\}];", 3110 []>; 3111def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3112 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3113 Int32Regs:$b, Int32Regs:$a), 3114 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3115 Float32Regs:$lod), 3116 "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3117 "[$t, \\{$l, $x\\}], $lod;", 3118 []>; 3119def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3120 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3121 Int32Regs:$b, Int32Regs:$a), 3122 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3123 Float32Regs:$gradx, Float32Regs:$grady), 3124 "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3125 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3126 []>; 3127def TEX_UNIFIED_1D_ARRAY_U32_S32 3128 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3129 Int32Regs:$b, Int32Regs:$a), 3130 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x), 3131 "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3132 "[$t, \\{$l, $x\\}];", 3133 []>; 3134def TEX_UNIFIED_1D_ARRAY_U32_F32 3135 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3136 Int32Regs:$b, Int32Regs:$a), 3137 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x), 3138 "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3139 "[$t, \\{$l, $x\\}];", 3140 []>; 3141def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3142 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3143 Int32Regs:$b, Int32Regs:$a), 3144 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3145 Float32Regs:$lod), 3146 "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3147 "[$t, \\{$l, $x\\}], $lod;", 3148 []>; 3149def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3150 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3151 Int32Regs:$b, Int32Regs:$a), 3152 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3153 Float32Regs:$gradx, Float32Regs:$grady), 3154 "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3155 "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3156 []>; 3157 3158def TEX_UNIFIED_2D_F32_S32 3159 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3160 Float32Regs:$b, Float32Regs:$a), 3161 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3162 "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3163 "[$t, \\{$x, $y\\}];", 3164 []>; 3165def TEX_UNIFIED_2D_F32_F32 3166 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3167 Float32Regs:$b, Float32Regs:$a), 3168 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3169 "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3170 "[$t, \\{$x, $y\\}];", 3171 []>; 3172def TEX_UNIFIED_2D_F32_F32_LEVEL 3173 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3174 Float32Regs:$b, Float32Regs:$a), 3175 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3176 Float32Regs:$lod), 3177 "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3178 "[$t, \\{$x, $y\\}], $lod;", 3179 []>; 3180def TEX_UNIFIED_2D_F32_F32_GRAD 3181 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3182 Float32Regs:$b, Float32Regs:$a), 3183 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3184 Float32Regs:$gradx0, Float32Regs:$gradx1, 3185 Float32Regs:$grady0, Float32Regs:$grady1), 3186 "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3187 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3188 "\\{$grady0, $grady1\\};", 3189 []>; 3190def TEX_UNIFIED_2D_S32_S32 3191 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3192 Int32Regs:$b, Int32Regs:$a), 3193 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3194 "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3195 "[$t, \\{$x, $y\\}];", 3196 []>; 3197def TEX_UNIFIED_2D_S32_F32 3198 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3199 Int32Regs:$b, Int32Regs:$a), 3200 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3201 "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3202 "[$t, \\{$x, $y\\}];", 3203 []>; 3204def TEX_UNIFIED_2D_S32_F32_LEVEL 3205 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3206 Int32Regs:$b, Int32Regs:$a), 3207 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3208 Float32Regs:$lod), 3209 "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3210 "[$t, \\{$x, $y\\}], $lod;", 3211 []>; 3212def TEX_UNIFIED_2D_S32_F32_GRAD 3213 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3214 Int32Regs:$b, Int32Regs:$a), 3215 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3216 Float32Regs:$gradx0, Float32Regs:$gradx1, 3217 Float32Regs:$grady0, Float32Regs:$grady1), 3218 "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3219 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3220 "\\{$grady0, $grady1\\};", 3221 []>; 3222def TEX_UNIFIED_2D_U32_S32 3223 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3224 Int32Regs:$b, Int32Regs:$a), 3225 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y), 3226 "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3227 "[$t, \\{$x, $y\\}];", 3228 []>; 3229def TEX_UNIFIED_2D_U32_F32 3230 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3231 Int32Regs:$b, Int32Regs:$a), 3232 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3233 "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3234 "[$t, \\{$x, $y\\}];", 3235 []>; 3236def TEX_UNIFIED_2D_U32_F32_LEVEL 3237 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3238 Int32Regs:$b, Int32Regs:$a), 3239 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3240 Float32Regs:$lod), 3241 "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3242 "[$t, \\{$x, $y\\}], $lod;", 3243 []>; 3244def TEX_UNIFIED_2D_U32_F32_GRAD 3245 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3246 Int32Regs:$b, Int32Regs:$a), 3247 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3248 Float32Regs:$gradx0, Float32Regs:$gradx1, 3249 Float32Regs:$grady0, Float32Regs:$grady1), 3250 "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3251 "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " 3252 "\\{$grady0, $grady1\\};", 3253 []>; 3254 3255def TEX_UNIFIED_2D_ARRAY_F32_S32 3256 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3257 Float32Regs:$b, Float32Regs:$a), 3258 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3259 Int32Regs:$y), 3260 "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3261 "[$t, \\{$l, $x, $y, $y\\}];", 3262 []>; 3263def TEX_UNIFIED_2D_ARRAY_F32_F32 3264 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3265 Float32Regs:$b, Float32Regs:$a), 3266 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3267 Float32Regs:$y), 3268 "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3269 "[$t, \\{$l, $x, $y, $y\\}];", 3270 []>; 3271def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 3272 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3273 Float32Regs:$b, Float32Regs:$a), 3274 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3275 Float32Regs:$y, Float32Regs:$lod), 3276 "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3277 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3278 []>; 3279def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 3280 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3281 Float32Regs:$b, Float32Regs:$a), 3282 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3283 Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, 3284 Float32Regs:$grady0, Float32Regs:$grady1), 3285 "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3286 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3287 "\\{$grady0, $grady1\\};", 3288 []>; 3289def TEX_UNIFIED_2D_ARRAY_S32_S32 3290 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3291 Int32Regs:$b, Int32Regs:$a), 3292 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3293 Int32Regs:$y), 3294 "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3295 "[$t, \\{$l, $x, $y, $y\\}];", 3296 []>; 3297def TEX_UNIFIED_2D_ARRAY_S32_F32 3298 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3299 Int32Regs:$b, Int32Regs:$a), 3300 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3301 Float32Regs:$y), 3302 "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3303 "[$t, \\{$l, $x, $y, $y\\}];", 3304 []>; 3305def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 3306 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3307 Int32Regs:$b, Int32Regs:$a), 3308 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3309 Float32Regs:$y, Float32Regs:$lod), 3310 "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3311 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3312 []>; 3313def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 3314 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3315 Int32Regs:$b, Int32Regs:$a), 3316 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3317 Float32Regs:$y, 3318 Float32Regs:$gradx0, Float32Regs:$gradx1, 3319 Float32Regs:$grady0, Float32Regs:$grady1), 3320 "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3321 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3322 "\\{$grady0, $grady1\\};", 3323 []>; 3324def TEX_UNIFIED_2D_ARRAY_U32_S32 3325 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3326 Int32Regs:$b, Int32Regs:$a), 3327 (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x, 3328 Int32Regs:$y), 3329 "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3330 "[$t, \\{$l, $x, $y, $y\\}];", 3331 []>; 3332def TEX_UNIFIED_2D_ARRAY_U32_F32 3333 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3334 Int32Regs:$b, Int32Regs:$a), 3335 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3336 Float32Regs:$y), 3337 "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3338 "[$t, \\{$l, $x, $y, $y\\}];", 3339 []>; 3340def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 3341 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3342 Int32Regs:$b, Int32Regs:$a), 3343 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3344 Float32Regs:$y, Float32Regs:$lod), 3345 "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3346 "[$t, \\{$l, $x, $y, $y\\}], $lod;", 3347 []>; 3348def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 3349 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3350 Int32Regs:$b, Int32Regs:$a), 3351 (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x, 3352 Float32Regs:$y, 3353 Float32Regs:$gradx0, Float32Regs:$gradx1, 3354 Float32Regs:$grady0, Float32Regs:$grady1), 3355 "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3356 "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " 3357 "\\{$grady0, $grady1\\};", 3358 []>; 3359 3360def TEX_UNIFIED_3D_F32_S32 3361 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3362 Float32Regs:$b, Float32Regs:$a), 3363 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3364 Int32Regs:$z), 3365 "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, " 3366 "[$t, \\{$x, $y, $z, $z\\}];", 3367 []>; 3368def TEX_UNIFIED_3D_F32_F32 3369 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3370 Float32Regs:$b, Float32Regs:$a), 3371 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3372 Float32Regs:$z), 3373 "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3374 "[$t, \\{$x, $y, $z, $z\\}];", 3375 []>; 3376def TEX_UNIFIED_3D_F32_F32_LEVEL 3377 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3378 Float32Regs:$b, Float32Regs:$a), 3379 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3380 Float32Regs:$z, Float32Regs:$lod), 3381 "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3382 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3383 []>; 3384def TEX_UNIFIED_3D_F32_F32_GRAD 3385 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3386 Float32Regs:$b, Float32Regs:$a), 3387 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3388 Float32Regs:$z, 3389 Float32Regs:$gradx0, Float32Regs:$gradx1, 3390 Float32Regs:$gradx2, Float32Regs:$grady0, 3391 Float32Regs:$grady1, Float32Regs:$grady2), 3392 "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3393 "[$t, \\{$x, $y, $z, $z\\}], " 3394 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3395 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3396 []>; 3397def TEX_UNIFIED_3D_S32_S32 3398 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3399 Int32Regs:$b, Int32Regs:$a), 3400 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3401 Int32Regs:$z), 3402 "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, " 3403 "[$t, \\{$x, $y, $z, $z\\}];", 3404 []>; 3405def TEX_UNIFIED_3D_S32_F32 3406 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3407 Int32Regs:$b, Int32Regs:$a), 3408 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3409 Float32Regs:$z), 3410 "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3411 "[$t, \\{$x, $y, $z, $z\\}];", 3412 []>; 3413def TEX_UNIFIED_3D_S32_F32_LEVEL 3414 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3415 Int32Regs:$b, Int32Regs:$a), 3416 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3417 Float32Regs:$z, Float32Regs:$lod), 3418 "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3419 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3420 []>; 3421def TEX_UNIFIED_3D_S32_F32_GRAD 3422 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3423 Int32Regs:$b, Int32Regs:$a), 3424 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3425 Float32Regs:$z, 3426 Float32Regs:$gradx0, Float32Regs:$gradx1, 3427 Float32Regs:$gradx2, Float32Regs:$grady0, 3428 Float32Regs:$grady1, Float32Regs:$grady2), 3429 "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3430 "[$t, \\{$x, $y, $z, $z\\}], " 3431 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3432 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3433 []>; 3434def TEX_UNIFIED_3D_U32_S32 3435 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3436 Int32Regs:$b, Int32Regs:$a), 3437 (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y, 3438 Int32Regs:$z), 3439 "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, " 3440 "[$t, \\{$x, $y, $z, $z\\}];", 3441 []>; 3442def TEX_UNIFIED_3D_U32_F32 3443 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3444 Int32Regs:$b, Int32Regs:$a), 3445 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3446 Float32Regs:$z), 3447 "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3448 "[$t, \\{$x, $y, $z, $z\\}];", 3449 []>; 3450def TEX_UNIFIED_3D_U32_F32_LEVEL 3451 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3452 Int32Regs:$b, Int32Regs:$a), 3453 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3454 Float32Regs:$z, Float32Regs:$lod), 3455 "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3456 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3457 []>; 3458def TEX_UNIFIED_3D_U32_F32_GRAD 3459 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3460 Int32Regs:$b, Int32Regs:$a), 3461 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y, 3462 Float32Regs:$z, 3463 Float32Regs:$gradx0, Float32Regs:$gradx1, 3464 Float32Regs:$gradx2, Float32Regs:$grady0, 3465 Float32Regs:$grady1, Float32Regs:$grady2), 3466 "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3467 "[$t, \\{$x, $y, $z, $z\\}], " 3468 "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " 3469 "\\{$grady0, $grady1, $grady2, $grady2\\};", 3470 []>; 3471 3472def TEX_UNIFIED_CUBE_F32_F32 3473 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3474 Float32Regs:$b, Float32Regs:$a), 3475 (ins Int64Regs:$t, 3476 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3477 "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3478 "[$t, \\{$x, $y, $z, $z\\}];", 3479 []>; 3480def TEX_UNIFIED_CUBE_F32_F32_LEVEL 3481 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3482 Float32Regs:$b, Float32Regs:$a), 3483 (ins Int64Regs:$t, 3484 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3485 Float32Regs:$lod), 3486 "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3487 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3488 []>; 3489def TEX_UNIFIED_CUBE_S32_F32 3490 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3491 Int32Regs:$b, Int32Regs:$a), 3492 (ins Int64Regs:$t, 3493 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3494 "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3495 "[$t, \\{$x, $y, $z, $z\\}];", 3496 []>; 3497def TEX_UNIFIED_CUBE_S32_F32_LEVEL 3498 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3499 Int32Regs:$b, Int32Regs:$a), 3500 (ins Int64Regs:$t, 3501 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3502 Float32Regs:$lod), 3503 "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3504 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3505 []>; 3506def TEX_UNIFIED_CUBE_U32_F32 3507 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3508 Int32Regs:$b, Int32Regs:$a), 3509 (ins Int64Regs:$t, 3510 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3511 "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3512 "[$t, \\{$x, $y, $z, $z\\}];", 3513 []>; 3514def TEX_UNIFIED_CUBE_U32_F32_LEVEL 3515 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3516 Int32Regs:$b, Int32Regs:$a), 3517 (ins Int64Regs:$t, 3518 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3519 Float32Regs:$lod), 3520 "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3521 "[$t, \\{$x, $y, $z, $z\\}], $lod;", 3522 []>; 3523 3524def TEX_UNIFIED_CUBE_ARRAY_F32_F32 3525 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3526 Float32Regs:$b, Float32Regs:$a), 3527 (ins Int64Regs:$t, Int32Regs:$l, 3528 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3529 "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3530 "[$t, \\{$l, $x, $y, $z\\}];", 3531 []>; 3532def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 3533 : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, 3534 Float32Regs:$b, Float32Regs:$a), 3535 (ins Int64Regs:$t, Int32Regs:$l, 3536 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3537 Float32Regs:$lod), 3538 "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, " 3539 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3540 []>; 3541def TEX_UNIFIED_CUBE_ARRAY_S32_F32 3542 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3543 Int32Regs:$b, Int32Regs:$a), 3544 (ins Int64Regs:$t, Int32Regs:$l, 3545 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3546 "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3547 "[$t, \\{$l, $x, $y, $z\\}];", 3548 []>; 3549def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 3550 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3551 Int32Regs:$b, Int32Regs:$a), 3552 (ins Int64Regs:$t, Int32Regs:$l, 3553 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3554 Float32Regs:$lod), 3555 "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, " 3556 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3557 []>; 3558def TEX_UNIFIED_CUBE_ARRAY_U32_F32 3559 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3560 Int32Regs:$b, Int32Regs:$a), 3561 (ins Int64Regs:$t, Int32Regs:$l, 3562 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z), 3563 "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3564 "[$t, \\{$l, $x, $y, $z\\}];", 3565 []>; 3566def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 3567 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, 3568 Int32Regs:$b, Int32Regs:$a), 3569 (ins Int64Regs:$t, Int32Regs:$l, 3570 Float32Regs:$x, Float32Regs:$y, Float32Regs:$z, 3571 Float32Regs:$lod), 3572 "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, " 3573 "[$t, \\{$l, $x, $y, $z\\}], $lod;", 3574 []>; 3575 3576def TLD4_UNIFIED_R_2D_F32_F32 3577 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3578 Float32Regs:$v2, Float32Regs:$v3), 3579 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3580 "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3581 "[$t, \\{$x, $y\\}];", 3582 []>; 3583def TLD4_UNIFIED_G_2D_F32_F32 3584 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3585 Float32Regs:$v2, Float32Regs:$v3), 3586 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3587 "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3588 "[$t, \\{$x, $y\\}];", 3589 []>; 3590def TLD4_UNIFIED_B_2D_F32_F32 3591 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3592 Float32Regs:$v2, Float32Regs:$v3), 3593 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3594 "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3595 "[$t, \\{$x, $y\\}];", 3596 []>; 3597def TLD4_UNIFIED_A_2D_F32_F32 3598 : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1, 3599 Float32Regs:$v2, Float32Regs:$v3), 3600 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3601 "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3602 "[$t, \\{$x, $y\\}];", 3603 []>; 3604def TLD4_UNIFIED_R_2D_S32_F32 3605 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3606 Int32Regs:$v2, Int32Regs:$v3), 3607 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3608 "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3609 "[$t, \\{$x, $y\\}];", 3610 []>; 3611def TLD4_UNIFIED_G_2D_S32_F32 3612 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3613 Int32Regs:$v2, Int32Regs:$v3), 3614 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3615 "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3616 "[$t, \\{$x, $y\\}];", 3617 []>; 3618def TLD4_UNIFIED_B_2D_S32_F32 3619 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3620 Int32Regs:$v2, Int32Regs:$v3), 3621 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3622 "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3623 "[$t, \\{$x, $y\\}];", 3624 []>; 3625def TLD4_UNIFIED_A_2D_S32_F32 3626 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3627 Int32Regs:$v2, Int32Regs:$v3), 3628 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3629 "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3630 "[$t, \\{$x, $y\\}];", 3631 []>; 3632def TLD4_UNIFIED_R_2D_U32_F32 3633 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3634 Int32Regs:$v2, Int32Regs:$v3), 3635 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3636 "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3637 "[$t, \\{$x, $y\\}];", 3638 []>; 3639def TLD4_UNIFIED_G_2D_U32_F32 3640 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3641 Int32Regs:$v2, Int32Regs:$v3), 3642 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3643 "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3644 "[$t, \\{$x, $y\\}];", 3645 []>; 3646def TLD4_UNIFIED_B_2D_U32_F32 3647 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3648 Int32Regs:$v2, Int32Regs:$v3), 3649 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3650 "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3651 "[$t, \\{$x, $y\\}];", 3652 []>; 3653def TLD4_UNIFIED_A_2D_U32_F32 3654 : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1, 3655 Int32Regs:$v2, Int32Regs:$v3), 3656 (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y), 3657 "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, " 3658 "[$t, \\{$x, $y\\}];", 3659 []>; 3660} 3661 3662 3663 3664//=== Surface load instructions 3665// .clamp variant 3666let IsSuld = 1 in { 3667def SULD_1D_I8_CLAMP 3668 : NVPTXInst<(outs Int16Regs:$r), 3669 (ins Int64Regs:$s, Int32Regs:$x), 3670 "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];", 3671 []>; 3672def SULD_1D_I16_CLAMP 3673 : NVPTXInst<(outs Int16Regs:$r), 3674 (ins Int64Regs:$s, Int32Regs:$x), 3675 "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];", 3676 []>; 3677def SULD_1D_I32_CLAMP 3678 : NVPTXInst<(outs Int32Regs:$r), 3679 (ins Int64Regs:$s, Int32Regs:$x), 3680 "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];", 3681 []>; 3682def SULD_1D_I64_CLAMP 3683 : NVPTXInst<(outs Int64Regs:$r), 3684 (ins Int64Regs:$s, Int32Regs:$x), 3685 "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];", 3686 []>; 3687 3688def SULD_1D_ARRAY_I8_CLAMP 3689 : NVPTXInst<(outs Int16Regs:$r), 3690 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3691 "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3692 []>; 3693def SULD_1D_ARRAY_I16_CLAMP 3694 : NVPTXInst<(outs Int16Regs:$r), 3695 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3696 "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3697 []>; 3698def SULD_1D_ARRAY_I32_CLAMP 3699 : NVPTXInst<(outs Int32Regs:$r), 3700 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3701 "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3702 []>; 3703def SULD_1D_ARRAY_I64_CLAMP 3704 : NVPTXInst<(outs Int64Regs:$r), 3705 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3706 "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];", 3707 []>; 3708 3709def SULD_2D_I8_CLAMP 3710 : NVPTXInst<(outs Int16Regs:$r), 3711 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3712 "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3713 []>; 3714def SULD_2D_I16_CLAMP 3715 : NVPTXInst<(outs Int16Regs:$r), 3716 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3717 "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3718 []>; 3719def SULD_2D_I32_CLAMP 3720 : NVPTXInst<(outs Int32Regs:$r), 3721 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3722 "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3723 []>; 3724def SULD_2D_I64_CLAMP 3725 : NVPTXInst<(outs Int64Regs:$r), 3726 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3727 "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];", 3728 []>; 3729 3730def SULD_2D_ARRAY_I8_CLAMP 3731 : NVPTXInst<(outs Int16Regs:$r), 3732 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3733 "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3734 []>; 3735def SULD_2D_ARRAY_I16_CLAMP 3736 : NVPTXInst<(outs Int16Regs:$r), 3737 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3738 "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3739 []>; 3740def SULD_2D_ARRAY_I32_CLAMP 3741 : NVPTXInst<(outs Int32Regs:$r), 3742 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3743 "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3744 []>; 3745def SULD_2D_ARRAY_I64_CLAMP 3746 : NVPTXInst<(outs Int64Regs:$r), 3747 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3748 "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 3749 []>; 3750 3751def SULD_3D_I8_CLAMP 3752 : NVPTXInst<(outs Int16Regs:$r), 3753 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3754 "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3755 []>; 3756def SULD_3D_I16_CLAMP 3757 : NVPTXInst<(outs Int16Regs:$r), 3758 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3759 "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3760 []>; 3761def SULD_3D_I32_CLAMP 3762 : NVPTXInst<(outs Int32Regs:$r), 3763 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3764 "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3765 []>; 3766def SULD_3D_I64_CLAMP 3767 : NVPTXInst<(outs Int64Regs:$r), 3768 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3769 "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 3770 []>; 3771} 3772 3773let IsSuld = 2 in { 3774def SULD_1D_V2I8_CLAMP 3775 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3776 (ins Int64Regs:$s, Int32Regs:$x), 3777 "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3778 []>; 3779def SULD_1D_V2I16_CLAMP 3780 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3781 (ins Int64Regs:$s, Int32Regs:$x), 3782 "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3783 []>; 3784def SULD_1D_V2I32_CLAMP 3785 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3786 (ins Int64Regs:$s, Int32Regs:$x), 3787 "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3788 []>; 3789def SULD_1D_V2I64_CLAMP 3790 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3791 (ins Int64Regs:$s, Int32Regs:$x), 3792 "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];", 3793 []>; 3794 3795def SULD_1D_ARRAY_V2I8_CLAMP 3796 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3797 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3798 "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3799 []>; 3800def SULD_1D_ARRAY_V2I16_CLAMP 3801 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3802 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3803 "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3804 []>; 3805def SULD_1D_ARRAY_V2I32_CLAMP 3806 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3807 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3808 "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3809 []>; 3810def SULD_1D_ARRAY_V2I64_CLAMP 3811 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3812 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3813 "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 3814 []>; 3815 3816def SULD_2D_V2I8_CLAMP 3817 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3818 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3819 "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3820 []>; 3821def SULD_2D_V2I16_CLAMP 3822 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3823 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3824 "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3825 []>; 3826def SULD_2D_V2I32_CLAMP 3827 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3828 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3829 "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3830 []>; 3831def SULD_2D_V2I64_CLAMP 3832 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3833 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3834 "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 3835 []>; 3836 3837def SULD_2D_ARRAY_V2I8_CLAMP 3838 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3839 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3840 "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, " 3841 "[$s, \\{$l, $x, $y, $y\\}];", 3842 []>; 3843def SULD_2D_ARRAY_V2I16_CLAMP 3844 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3845 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3846 "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, " 3847 "[$s, \\{$l, $x, $y, $y\\}];", 3848 []>; 3849def SULD_2D_ARRAY_V2I32_CLAMP 3850 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3851 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3852 "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, " 3853 "[$s, \\{$l, $x, $y, $y\\}];", 3854 []>; 3855def SULD_2D_ARRAY_V2I64_CLAMP 3856 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3857 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3858 "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, " 3859 "[$s, \\{$l, $x, $y, $y\\}];", 3860 []>; 3861 3862def SULD_3D_V2I8_CLAMP 3863 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3864 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3865 "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3866 []>; 3867def SULD_3D_V2I16_CLAMP 3868 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 3869 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3870 "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3871 []>; 3872def SULD_3D_V2I32_CLAMP 3873 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 3874 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3875 "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3876 []>; 3877def SULD_3D_V2I64_CLAMP 3878 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 3879 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3880 "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 3881 []>; 3882} 3883 3884let IsSuld = 3 in { 3885def SULD_1D_V4I8_CLAMP 3886 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3887 (ins Int64Regs:$s, Int32Regs:$x), 3888 "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3889 []>; 3890def SULD_1D_V4I16_CLAMP 3891 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3892 (ins Int64Regs:$s, Int32Regs:$x), 3893 "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3894 []>; 3895def SULD_1D_V4I32_CLAMP 3896 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3897 (ins Int64Regs:$s, Int32Regs:$x), 3898 "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 3899 []>; 3900 3901def SULD_1D_ARRAY_V4I8_CLAMP 3902 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3903 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3904 "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3905 "[$s, \\{$l, $x\\}];", 3906 []>; 3907def SULD_1D_ARRAY_V4I16_CLAMP 3908 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3909 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3910 "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3911 "[$s, \\{$l, $x\\}];", 3912 []>; 3913def SULD_1D_ARRAY_V4I32_CLAMP 3914 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3915 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 3916 "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3917 "[$s, \\{$l, $x\\}];", 3918 []>; 3919 3920def SULD_2D_V4I8_CLAMP 3921 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3922 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3923 "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3924 []>; 3925def SULD_2D_V4I16_CLAMP 3926 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3927 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3928 "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3929 []>; 3930def SULD_2D_V4I32_CLAMP 3931 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3932 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 3933 "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 3934 []>; 3935 3936def SULD_2D_ARRAY_V4I8_CLAMP 3937 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3938 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3939 "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3940 "[$s, \\{$l, $x, $y, $y\\}];", 3941 []>; 3942def SULD_2D_ARRAY_V4I16_CLAMP 3943 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3944 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3945 "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3946 "[$s, \\{$l, $x, $y, $y\\}];", 3947 []>; 3948def SULD_2D_ARRAY_V4I32_CLAMP 3949 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3950 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 3951 "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3952 "[$s, \\{$l, $x, $y, $y\\}];", 3953 []>; 3954 3955 3956def SULD_3D_V4I8_CLAMP 3957 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3958 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3959 "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, " 3960 "[$s, \\{$x, $y, $z, $z\\}];", 3961 []>; 3962def SULD_3D_V4I16_CLAMP 3963 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 3964 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3965 "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, " 3966 "[$s, \\{$x, $y, $z, $z\\}];", 3967 []>; 3968def SULD_3D_V4I32_CLAMP 3969 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 3970 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 3971 "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, " 3972 "[$s, \\{$x, $y, $z, $z\\}];", 3973 []>; 3974} 3975 3976 3977// .trap variant 3978let IsSuld = 1 in { 3979def SULD_1D_I8_TRAP 3980 : NVPTXInst<(outs Int16Regs:$r), 3981 (ins Int64Regs:$s, Int32Regs:$x), 3982 "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", 3983 []>; 3984def SULD_1D_I16_TRAP 3985 : NVPTXInst<(outs Int16Regs:$r), 3986 (ins Int64Regs:$s, Int32Regs:$x), 3987 "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", 3988 []>; 3989def SULD_1D_I32_TRAP 3990 : NVPTXInst<(outs Int32Regs:$r), 3991 (ins Int64Regs:$s, Int32Regs:$x), 3992 "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", 3993 []>; 3994def SULD_1D_I64_TRAP 3995 : NVPTXInst<(outs Int64Regs:$r), 3996 (ins Int64Regs:$s, Int32Regs:$x), 3997 "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];", 3998 []>; 3999 4000def SULD_1D_ARRAY_I8_TRAP 4001 : NVPTXInst<(outs Int16Regs:$r), 4002 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4003 "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4004 []>; 4005def SULD_1D_ARRAY_I16_TRAP 4006 : NVPTXInst<(outs Int16Regs:$r), 4007 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4008 "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4009 []>; 4010def SULD_1D_ARRAY_I32_TRAP 4011 : NVPTXInst<(outs Int32Regs:$r), 4012 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4013 "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4014 []>; 4015def SULD_1D_ARRAY_I64_TRAP 4016 : NVPTXInst<(outs Int64Regs:$r), 4017 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4018 "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];", 4019 []>; 4020 4021def SULD_2D_I8_TRAP 4022 : NVPTXInst<(outs Int16Regs:$r), 4023 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4024 "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4025 []>; 4026def SULD_2D_I16_TRAP 4027 : NVPTXInst<(outs Int16Regs:$r), 4028 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4029 "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4030 []>; 4031def SULD_2D_I32_TRAP 4032 : NVPTXInst<(outs Int32Regs:$r), 4033 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4034 "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4035 []>; 4036def SULD_2D_I64_TRAP 4037 : NVPTXInst<(outs Int64Regs:$r), 4038 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4039 "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];", 4040 []>; 4041 4042def SULD_2D_ARRAY_I8_TRAP 4043 : NVPTXInst<(outs Int16Regs:$r), 4044 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4045 "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4046 []>; 4047def SULD_2D_ARRAY_I16_TRAP 4048 : NVPTXInst<(outs Int16Regs:$r), 4049 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4050 "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4051 []>; 4052def SULD_2D_ARRAY_I32_TRAP 4053 : NVPTXInst<(outs Int32Regs:$r), 4054 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4055 "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4056 []>; 4057def SULD_2D_ARRAY_I64_TRAP 4058 : NVPTXInst<(outs Int64Regs:$r), 4059 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4060 "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4061 []>; 4062 4063def SULD_3D_I8_TRAP 4064 : NVPTXInst<(outs Int16Regs:$r), 4065 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4066 "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4067 []>; 4068def SULD_3D_I16_TRAP 4069 : NVPTXInst<(outs Int16Regs:$r), 4070 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4071 "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4072 []>; 4073def SULD_3D_I32_TRAP 4074 : NVPTXInst<(outs Int32Regs:$r), 4075 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4076 "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4077 []>; 4078def SULD_3D_I64_TRAP 4079 : NVPTXInst<(outs Int64Regs:$r), 4080 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4081 "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4082 []>; 4083} 4084 4085let IsSuld = 2 in { 4086def SULD_1D_V2I8_TRAP 4087 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4088 (ins Int64Regs:$s, Int32Regs:$x), 4089 "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4090 []>; 4091def SULD_1D_V2I16_TRAP 4092 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4093 (ins Int64Regs:$s, Int32Regs:$x), 4094 "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4095 []>; 4096def SULD_1D_V2I32_TRAP 4097 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4098 (ins Int64Regs:$s, Int32Regs:$x), 4099 "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4100 []>; 4101def SULD_1D_V2I64_TRAP 4102 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4103 (ins Int64Regs:$s, Int32Regs:$x), 4104 "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];", 4105 []>; 4106 4107def SULD_1D_ARRAY_V2I8_TRAP 4108 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4109 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4110 "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4111 []>; 4112def SULD_1D_ARRAY_V2I16_TRAP 4113 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4114 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4115 "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4116 []>; 4117def SULD_1D_ARRAY_V2I32_TRAP 4118 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4119 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4120 "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4121 []>; 4122def SULD_1D_ARRAY_V2I64_TRAP 4123 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4124 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4125 "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4126 []>; 4127 4128def SULD_2D_V2I8_TRAP 4129 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4130 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4131 "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4132 []>; 4133def SULD_2D_V2I16_TRAP 4134 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4135 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4136 "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4137 []>; 4138def SULD_2D_V2I32_TRAP 4139 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4140 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4141 "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4142 []>; 4143def SULD_2D_V2I64_TRAP 4144 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4145 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4146 "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4147 []>; 4148 4149def SULD_2D_ARRAY_V2I8_TRAP 4150 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4151 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4152 "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " 4153 "[$s, \\{$l, $x, $y, $y\\}];", 4154 []>; 4155def SULD_2D_ARRAY_V2I16_TRAP 4156 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4157 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4158 "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " 4159 "[$s, \\{$l, $x, $y, $y\\}];", 4160 []>; 4161def SULD_2D_ARRAY_V2I32_TRAP 4162 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4163 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4164 "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " 4165 "[$s, \\{$l, $x, $y, $y\\}];", 4166 []>; 4167def SULD_2D_ARRAY_V2I64_TRAP 4168 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4169 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4170 "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, " 4171 "[$s, \\{$l, $x, $y, $y\\}];", 4172 []>; 4173 4174def SULD_3D_V2I8_TRAP 4175 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4176 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4177 "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4178 []>; 4179def SULD_3D_V2I16_TRAP 4180 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4181 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4182 "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4183 []>; 4184def SULD_3D_V2I32_TRAP 4185 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4186 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4187 "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4188 []>; 4189def SULD_3D_V2I64_TRAP 4190 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4191 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4192 "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4193 []>; 4194} 4195 4196let IsSuld = 3 in { 4197def SULD_1D_V4I8_TRAP 4198 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4199 (ins Int64Regs:$s, Int32Regs:$x), 4200 "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4201 []>; 4202def SULD_1D_V4I16_TRAP 4203 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4204 (ins Int64Regs:$s, Int32Regs:$x), 4205 "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4206 []>; 4207def SULD_1D_V4I32_TRAP 4208 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4209 (ins Int64Regs:$s, Int32Regs:$x), 4210 "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4211 []>; 4212 4213def SULD_1D_ARRAY_V4I8_TRAP 4214 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4215 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4216 "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4217 "[$s, \\{$l, $x\\}];", 4218 []>; 4219def SULD_1D_ARRAY_V4I16_TRAP 4220 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4221 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4222 "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4223 "[$s, \\{$l, $x\\}];", 4224 []>; 4225def SULD_1D_ARRAY_V4I32_TRAP 4226 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4227 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4228 "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4229 "[$s, \\{$l, $x\\}];", 4230 []>; 4231 4232def SULD_2D_V4I8_TRAP 4233 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4234 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4235 "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4236 []>; 4237def SULD_2D_V4I16_TRAP 4238 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4239 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4240 "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4241 []>; 4242def SULD_2D_V4I32_TRAP 4243 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4244 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4245 "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4246 []>; 4247 4248def SULD_2D_ARRAY_V4I8_TRAP 4249 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4250 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4251 "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4252 "[$s, \\{$l, $x, $y, $y\\}];", 4253 []>; 4254def SULD_2D_ARRAY_V4I16_TRAP 4255 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4256 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4257 "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4258 "[$s, \\{$l, $x, $y, $y\\}];", 4259 []>; 4260def SULD_2D_ARRAY_V4I32_TRAP 4261 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4262 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4263 "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4264 "[$s, \\{$l, $x, $y, $y\\}];", 4265 []>; 4266 4267 4268def SULD_3D_V4I8_TRAP 4269 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4270 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4271 "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " 4272 "[$s, \\{$x, $y, $z, $z\\}];", 4273 []>; 4274def SULD_3D_V4I16_TRAP 4275 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4276 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4277 "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " 4278 "[$s, \\{$x, $y, $z, $z\\}];", 4279 []>; 4280def SULD_3D_V4I32_TRAP 4281 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4282 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4283 "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " 4284 "[$s, \\{$x, $y, $z, $z\\}];", 4285 []>; 4286} 4287 4288// .zero variant 4289let IsSuld = 1 in { 4290def SULD_1D_I8_ZERO 4291 : NVPTXInst<(outs Int16Regs:$r), 4292 (ins Int64Regs:$s, Int32Regs:$x), 4293 "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];", 4294 []>; 4295def SULD_1D_I16_ZERO 4296 : NVPTXInst<(outs Int16Regs:$r), 4297 (ins Int64Regs:$s, Int32Regs:$x), 4298 "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];", 4299 []>; 4300def SULD_1D_I32_ZERO 4301 : NVPTXInst<(outs Int32Regs:$r), 4302 (ins Int64Regs:$s, Int32Regs:$x), 4303 "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];", 4304 []>; 4305def SULD_1D_I64_ZERO 4306 : NVPTXInst<(outs Int64Regs:$r), 4307 (ins Int64Regs:$s, Int32Regs:$x), 4308 "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];", 4309 []>; 4310 4311def SULD_1D_ARRAY_I8_ZERO 4312 : NVPTXInst<(outs Int16Regs:$r), 4313 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4314 "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4315 []>; 4316def SULD_1D_ARRAY_I16_ZERO 4317 : NVPTXInst<(outs Int16Regs:$r), 4318 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4319 "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4320 []>; 4321def SULD_1D_ARRAY_I32_ZERO 4322 : NVPTXInst<(outs Int32Regs:$r), 4323 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4324 "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4325 []>; 4326def SULD_1D_ARRAY_I64_ZERO 4327 : NVPTXInst<(outs Int64Regs:$r), 4328 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4329 "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];", 4330 []>; 4331 4332def SULD_2D_I8_ZERO 4333 : NVPTXInst<(outs Int16Regs:$r), 4334 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4335 "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4336 []>; 4337def SULD_2D_I16_ZERO 4338 : NVPTXInst<(outs Int16Regs:$r), 4339 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4340 "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4341 []>; 4342def SULD_2D_I32_ZERO 4343 : NVPTXInst<(outs Int32Regs:$r), 4344 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4345 "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4346 []>; 4347def SULD_2D_I64_ZERO 4348 : NVPTXInst<(outs Int64Regs:$r), 4349 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4350 "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];", 4351 []>; 4352 4353def SULD_2D_ARRAY_I8_ZERO 4354 : NVPTXInst<(outs Int16Regs:$r), 4355 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4356 "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4357 []>; 4358def SULD_2D_ARRAY_I16_ZERO 4359 : NVPTXInst<(outs Int16Regs:$r), 4360 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4361 "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4362 []>; 4363def SULD_2D_ARRAY_I32_ZERO 4364 : NVPTXInst<(outs Int32Regs:$r), 4365 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4366 "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4367 []>; 4368def SULD_2D_ARRAY_I64_ZERO 4369 : NVPTXInst<(outs Int64Regs:$r), 4370 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4371 "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4372 []>; 4373 4374def SULD_3D_I8_ZERO 4375 : NVPTXInst<(outs Int16Regs:$r), 4376 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4377 "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4378 []>; 4379def SULD_3D_I16_ZERO 4380 : NVPTXInst<(outs Int16Regs:$r), 4381 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4382 "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4383 []>; 4384def SULD_3D_I32_ZERO 4385 : NVPTXInst<(outs Int32Regs:$r), 4386 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4387 "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4388 []>; 4389def SULD_3D_I64_ZERO 4390 : NVPTXInst<(outs Int64Regs:$r), 4391 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4392 "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4393 []>; 4394} 4395 4396let IsSuld = 2 in { 4397def SULD_1D_V2I8_ZERO 4398 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4399 (ins Int64Regs:$s, Int32Regs:$x), 4400 "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4401 []>; 4402def SULD_1D_V2I16_ZERO 4403 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4404 (ins Int64Regs:$s, Int32Regs:$x), 4405 "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4406 []>; 4407def SULD_1D_V2I32_ZERO 4408 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4409 (ins Int64Regs:$s, Int32Regs:$x), 4410 "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4411 []>; 4412def SULD_1D_V2I64_ZERO 4413 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4414 (ins Int64Regs:$s, Int32Regs:$x), 4415 "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];", 4416 []>; 4417 4418def SULD_1D_ARRAY_V2I8_ZERO 4419 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4420 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4421 "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4422 []>; 4423def SULD_1D_ARRAY_V2I16_ZERO 4424 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4425 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4426 "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4427 []>; 4428def SULD_1D_ARRAY_V2I32_ZERO 4429 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4430 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4431 "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4432 []>; 4433def SULD_1D_ARRAY_V2I64_ZERO 4434 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4435 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4436 "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4437 []>; 4438 4439def SULD_2D_V2I8_ZERO 4440 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4441 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4442 "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4443 []>; 4444def SULD_2D_V2I16_ZERO 4445 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4446 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4447 "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4448 []>; 4449def SULD_2D_V2I32_ZERO 4450 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4451 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4452 "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4453 []>; 4454def SULD_2D_V2I64_ZERO 4455 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4456 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4457 "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4458 []>; 4459 4460def SULD_2D_ARRAY_V2I8_ZERO 4461 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4462 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4463 "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, " 4464 "[$s, \\{$l, $x, $y, $y\\}];", 4465 []>; 4466def SULD_2D_ARRAY_V2I16_ZERO 4467 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4468 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4469 "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, " 4470 "[$s, \\{$l, $x, $y, $y\\}];", 4471 []>; 4472def SULD_2D_ARRAY_V2I32_ZERO 4473 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4474 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4475 "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, " 4476 "[$s, \\{$l, $x, $y, $y\\}];", 4477 []>; 4478def SULD_2D_ARRAY_V2I64_ZERO 4479 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4480 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4481 "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, " 4482 "[$s, \\{$l, $x, $y, $y\\}];", 4483 []>; 4484 4485def SULD_3D_V2I8_ZERO 4486 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4487 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4488 "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4489 []>; 4490def SULD_3D_V2I16_ZERO 4491 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), 4492 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4493 "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4494 []>; 4495def SULD_3D_V2I32_ZERO 4496 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), 4497 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4498 "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4499 []>; 4500def SULD_3D_V2I64_ZERO 4501 : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g), 4502 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4503 "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4504 []>; 4505} 4506 4507let IsSuld = 3 in { 4508def SULD_1D_V4I8_ZERO 4509 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4510 (ins Int64Regs:$s, Int32Regs:$x), 4511 "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4512 []>; 4513def SULD_1D_V4I16_ZERO 4514 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4515 (ins Int64Regs:$s, Int32Regs:$x), 4516 "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4517 []>; 4518def SULD_1D_V4I32_ZERO 4519 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4520 (ins Int64Regs:$s, Int32Regs:$x), 4521 "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4522 []>; 4523 4524def SULD_1D_ARRAY_V4I8_ZERO 4525 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4526 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4527 "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4528 "[$s, \\{$l, $x\\}];", 4529 []>; 4530def SULD_1D_ARRAY_V4I16_ZERO 4531 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4532 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4533 "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4534 "[$s, \\{$l, $x\\}];", 4535 []>; 4536def SULD_1D_ARRAY_V4I32_ZERO 4537 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4538 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), 4539 "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4540 "[$s, \\{$l, $x\\}];", 4541 []>; 4542 4543def SULD_2D_V4I8_ZERO 4544 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4545 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4546 "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4547 []>; 4548def SULD_2D_V4I16_ZERO 4549 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4550 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4551 "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4552 []>; 4553def SULD_2D_V4I32_ZERO 4554 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4555 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), 4556 "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4557 []>; 4558 4559def SULD_2D_ARRAY_V4I8_ZERO 4560 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4561 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4562 "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4563 "[$s, \\{$l, $x, $y, $y\\}];", 4564 []>; 4565def SULD_2D_ARRAY_V4I16_ZERO 4566 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4567 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4568 "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4569 "[$s, \\{$l, $x, $y, $y\\}];", 4570 []>; 4571def SULD_2D_ARRAY_V4I32_ZERO 4572 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4573 (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), 4574 "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4575 "[$s, \\{$l, $x, $y, $y\\}];", 4576 []>; 4577 4578 4579def SULD_3D_V4I8_ZERO 4580 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4581 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4582 "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, " 4583 "[$s, \\{$x, $y, $z, $z\\}];", 4584 []>; 4585def SULD_3D_V4I16_ZERO 4586 : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4587 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4588 "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, " 4589 "[$s, \\{$x, $y, $z, $z\\}];", 4590 []>; 4591def SULD_3D_V4I32_ZERO 4592 : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4593 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), 4594 "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, " 4595 "[$s, \\{$x, $y, $z, $z\\}];", 4596 []>; 4597} 4598 4599//----------------------------------- 4600// Texture Query Intrinsics 4601//----------------------------------- 4602 4603let IsSurfTexQuery = 1 in { 4604def TXQ_CHANNEL_ORDER 4605 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4606 "txq.channel_order.b32 \t$d, [$a];", 4607 []>; 4608def TXQ_CHANNEL_DATA_TYPE 4609 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4610 "txq.channel_data_type.b32 \t$d, [$a];", 4611 []>; 4612def TXQ_WIDTH 4613 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4614 "txq.width.b32 \t$d, [$a];", 4615 []>; 4616def TXQ_HEIGHT 4617 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4618 "txq.height.b32 \t$d, [$a];", 4619 []>; 4620def TXQ_DEPTH 4621 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4622 "txq.depth.b32 \t$d, [$a];", 4623 []>; 4624def TXQ_ARRAY_SIZE 4625 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4626 "txq.array_size.b32 \t$d, [$a];", 4627 []>; 4628def TXQ_NUM_SAMPLES 4629 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4630 "txq.num_samples.b32 \t$d, [$a];", 4631 []>; 4632def TXQ_NUM_MIPMAP_LEVELS 4633 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4634 "txq.num_mipmap_levels.b32 \t$d, [$a];", 4635 []>; 4636} 4637 4638def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), 4639 (TXQ_CHANNEL_ORDER Int64Regs:$a)>; 4640def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), 4641 (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4642def : Pat<(int_nvvm_txq_width Int64Regs:$a), 4643 (TXQ_WIDTH Int64Regs:$a)>; 4644def : Pat<(int_nvvm_txq_height Int64Regs:$a), 4645 (TXQ_HEIGHT Int64Regs:$a)>; 4646def : Pat<(int_nvvm_txq_depth Int64Regs:$a), 4647 (TXQ_DEPTH Int64Regs:$a)>; 4648def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), 4649 (TXQ_ARRAY_SIZE Int64Regs:$a)>; 4650def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), 4651 (TXQ_NUM_SAMPLES Int64Regs:$a)>; 4652def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), 4653 (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; 4654 4655 4656//----------------------------------- 4657// Surface Query Intrinsics 4658//----------------------------------- 4659 4660let IsSurfTexQuery = 1 in { 4661def SUQ_CHANNEL_ORDER 4662 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4663 "suq.channel_order.b32 \t$d, [$a];", 4664 []>; 4665def SUQ_CHANNEL_DATA_TYPE 4666 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4667 "suq.channel_data_type.b32 \t$d, [$a];", 4668 []>; 4669def SUQ_WIDTH 4670 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4671 "suq.width.b32 \t$d, [$a];", 4672 []>; 4673def SUQ_HEIGHT 4674 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4675 "suq.height.b32 \t$d, [$a];", 4676 []>; 4677def SUQ_DEPTH 4678 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4679 "suq.depth.b32 \t$d, [$a];", 4680 []>; 4681def SUQ_ARRAY_SIZE 4682 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 4683 "suq.array_size.b32 \t$d, [$a];", 4684 []>; 4685} 4686 4687def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), 4688 (SUQ_CHANNEL_ORDER Int64Regs:$a)>; 4689def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), 4690 (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; 4691def : Pat<(int_nvvm_suq_width Int64Regs:$a), 4692 (SUQ_WIDTH Int64Regs:$a)>; 4693def : Pat<(int_nvvm_suq_height Int64Regs:$a), 4694 (SUQ_HEIGHT Int64Regs:$a)>; 4695def : Pat<(int_nvvm_suq_depth Int64Regs:$a), 4696 (SUQ_DEPTH Int64Regs:$a)>; 4697def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), 4698 (SUQ_ARRAY_SIZE Int64Regs:$a)>; 4699 4700 4701//===- Handle Query -------------------------------------------------------===// 4702 4703// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 4704def ISTYPEP_SAMPLER 4705 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4706 "istypep.samplerref \t$d, $a;", 4707 [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; 4708def ISTYPEP_SURFACE 4709 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4710 "istypep.surfref \t$d, $a;", 4711 [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; 4712def ISTYPEP_TEXTURE 4713 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 4714 "istypep.texref \t$d, $a;", 4715 [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; 4716 4717//===- Surface Stores -----------------------------------------------------===// 4718 4719let IsSust = 1 in { 4720// Unformatted 4721// .clamp variant 4722def SUST_B_1D_B8_CLAMP 4723 : NVPTXInst<(outs), 4724 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4725 "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4726 []>; 4727def SUST_B_1D_B16_CLAMP 4728 : NVPTXInst<(outs), 4729 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 4730 "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4731 []>; 4732def SUST_B_1D_B32_CLAMP 4733 : NVPTXInst<(outs), 4734 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 4735 "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4736 []>; 4737def SUST_B_1D_B64_CLAMP 4738 : NVPTXInst<(outs), 4739 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 4740 "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};", 4741 []>; 4742def SUST_B_1D_V2B8_CLAMP 4743 : NVPTXInst<(outs), 4744 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4745 "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4746 []>; 4747def SUST_B_1D_V2B16_CLAMP 4748 : NVPTXInst<(outs), 4749 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 4750 "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4751 []>; 4752def SUST_B_1D_V2B32_CLAMP 4753 : NVPTXInst<(outs), 4754 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 4755 "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4756 []>; 4757def SUST_B_1D_V2B64_CLAMP 4758 : NVPTXInst<(outs), 4759 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 4760 "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};", 4761 []>; 4762def SUST_B_1D_V4B8_CLAMP 4763 : NVPTXInst<(outs), 4764 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4765 Int16Regs:$b, Int16Regs:$a), 4766 "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4767 []>; 4768def SUST_B_1D_V4B16_CLAMP 4769 : NVPTXInst<(outs), 4770 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 4771 Int16Regs:$b, Int16Regs:$a), 4772 "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4773 []>; 4774def SUST_B_1D_V4B32_CLAMP 4775 : NVPTXInst<(outs), 4776 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 4777 Int32Regs:$b, Int32Regs:$a), 4778 "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 4779 []>; 4780 4781 4782def SUST_B_1D_ARRAY_B8_CLAMP 4783 : NVPTXInst<(outs), 4784 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4785 "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4786 []>; 4787def SUST_B_1D_ARRAY_B16_CLAMP 4788 : NVPTXInst<(outs), 4789 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 4790 "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4791 []>; 4792def SUST_B_1D_ARRAY_B32_CLAMP 4793 : NVPTXInst<(outs), 4794 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 4795 "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4796 []>; 4797def SUST_B_1D_ARRAY_B64_CLAMP 4798 : NVPTXInst<(outs), 4799 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 4800 "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};", 4801 []>; 4802def SUST_B_1D_ARRAY_V2B8_CLAMP 4803 : NVPTXInst<(outs), 4804 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4805 Int16Regs:$g), 4806 "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4807 []>; 4808def SUST_B_1D_ARRAY_V2B16_CLAMP 4809 : NVPTXInst<(outs), 4810 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4811 Int16Regs:$g), 4812 "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4813 []>; 4814def SUST_B_1D_ARRAY_V2B32_CLAMP 4815 : NVPTXInst<(outs), 4816 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 4817 Int32Regs:$g), 4818 "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4819 []>; 4820def SUST_B_1D_ARRAY_V2B64_CLAMP 4821 : NVPTXInst<(outs), 4822 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 4823 Int64Regs:$g), 4824 "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 4825 []>; 4826def SUST_B_1D_ARRAY_V4B8_CLAMP 4827 : NVPTXInst<(outs), 4828 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4829 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4830 "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], " 4831 "\\{$r, $g, $b, $a\\};", 4832 []>; 4833def SUST_B_1D_ARRAY_V4B16_CLAMP 4834 : NVPTXInst<(outs), 4835 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 4836 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4837 "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], " 4838 "\\{$r, $g, $b, $a\\};", 4839 []>; 4840def SUST_B_1D_ARRAY_V4B32_CLAMP 4841 : NVPTXInst<(outs), 4842 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 4843 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4844 "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], " 4845 "\\{$r, $g, $b, $a\\};", 4846 []>; 4847 4848 4849def SUST_B_2D_B8_CLAMP 4850 : NVPTXInst<(outs), 4851 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4852 "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4853 []>; 4854def SUST_B_2D_B16_CLAMP 4855 : NVPTXInst<(outs), 4856 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 4857 "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4858 []>; 4859def SUST_B_2D_B32_CLAMP 4860 : NVPTXInst<(outs), 4861 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 4862 "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4863 []>; 4864def SUST_B_2D_B64_CLAMP 4865 : NVPTXInst<(outs), 4866 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 4867 "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};", 4868 []>; 4869def SUST_B_2D_V2B8_CLAMP 4870 : NVPTXInst<(outs), 4871 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4872 Int16Regs:$g), 4873 "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4874 []>; 4875def SUST_B_2D_V2B16_CLAMP 4876 : NVPTXInst<(outs), 4877 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4878 Int16Regs:$g), 4879 "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4880 []>; 4881def SUST_B_2D_V2B32_CLAMP 4882 : NVPTXInst<(outs), 4883 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 4884 Int32Regs:$g), 4885 "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4886 []>; 4887def SUST_B_2D_V2B64_CLAMP 4888 : NVPTXInst<(outs), 4889 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 4890 Int64Regs:$g), 4891 "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 4892 []>; 4893def SUST_B_2D_V4B8_CLAMP 4894 : NVPTXInst<(outs), 4895 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4896 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4897 "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], " 4898 "\\{$r, $g, $b, $a\\};", 4899 []>; 4900def SUST_B_2D_V4B16_CLAMP 4901 : NVPTXInst<(outs), 4902 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 4903 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4904 "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], " 4905 "\\{$r, $g, $b, $a\\};", 4906 []>; 4907def SUST_B_2D_V4B32_CLAMP 4908 : NVPTXInst<(outs), 4909 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 4910 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4911 "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], " 4912 "\\{$r, $g, $b, $a\\};", 4913 []>; 4914 4915 4916def SUST_B_2D_ARRAY_B8_CLAMP 4917 : NVPTXInst<(outs), 4918 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4919 Int16Regs:$r), 4920 "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4921 []>; 4922def SUST_B_2D_ARRAY_B16_CLAMP 4923 : NVPTXInst<(outs), 4924 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4925 Int16Regs:$r), 4926 "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4927 []>; 4928def SUST_B_2D_ARRAY_B32_CLAMP 4929 : NVPTXInst<(outs), 4930 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4931 Int32Regs:$r), 4932 "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4933 []>; 4934def SUST_B_2D_ARRAY_B64_CLAMP 4935 : NVPTXInst<(outs), 4936 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4937 Int64Regs:$r), 4938 "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 4939 []>; 4940def SUST_B_2D_ARRAY_V2B8_CLAMP 4941 : NVPTXInst<(outs), 4942 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4943 Int16Regs:$r, Int16Regs:$g), 4944 "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4945 "\\{$r, $g\\};", 4946 []>; 4947def SUST_B_2D_ARRAY_V2B16_CLAMP 4948 : NVPTXInst<(outs), 4949 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4950 Int16Regs:$r, Int16Regs:$g), 4951 "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4952 "\\{$r, $g\\};", 4953 []>; 4954def SUST_B_2D_ARRAY_V2B32_CLAMP 4955 : NVPTXInst<(outs), 4956 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4957 Int32Regs:$r, Int32Regs:$g), 4958 "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4959 "\\{$r, $g\\};", 4960 []>; 4961def SUST_B_2D_ARRAY_V2B64_CLAMP 4962 : NVPTXInst<(outs), 4963 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4964 Int64Regs:$r, Int64Regs:$g), 4965 "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4966 "\\{$r, $g\\};", 4967 []>; 4968def SUST_B_2D_ARRAY_V4B8_CLAMP 4969 : NVPTXInst<(outs), 4970 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4971 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4972 "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4973 "\\{$r, $g, $b, $a\\};", 4974 []>; 4975def SUST_B_2D_ARRAY_V4B16_CLAMP 4976 : NVPTXInst<(outs), 4977 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4978 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 4979 "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4980 "\\{$r, $g, $b, $a\\};", 4981 []>; 4982def SUST_B_2D_ARRAY_V4B32_CLAMP 4983 : NVPTXInst<(outs), 4984 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 4985 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 4986 "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], " 4987 "\\{$r, $g, $b, $a\\};", 4988 []>; 4989 4990 4991def SUST_B_3D_B8_CLAMP 4992 : NVPTXInst<(outs), 4993 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 4994 Int16Regs:$r), 4995 "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 4996 []>; 4997def SUST_B_3D_B16_CLAMP 4998 : NVPTXInst<(outs), 4999 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5000 Int16Regs:$r), 5001 "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5002 []>; 5003def SUST_B_3D_B32_CLAMP 5004 : NVPTXInst<(outs), 5005 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5006 Int32Regs:$r), 5007 "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5008 []>; 5009def SUST_B_3D_B64_CLAMP 5010 : NVPTXInst<(outs), 5011 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5012 Int64Regs:$r), 5013 "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5014 []>; 5015def SUST_B_3D_V2B8_CLAMP 5016 : NVPTXInst<(outs), 5017 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5018 Int16Regs:$r, Int16Regs:$g), 5019 "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5020 "\\{$r, $g\\};", 5021 []>; 5022def SUST_B_3D_V2B16_CLAMP 5023 : NVPTXInst<(outs), 5024 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5025 Int16Regs:$r, Int16Regs:$g), 5026 "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5027 "\\{$r, $g\\};", 5028 []>; 5029def SUST_B_3D_V2B32_CLAMP 5030 : NVPTXInst<(outs), 5031 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5032 Int32Regs:$r, Int32Regs:$g), 5033 "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5034 "\\{$r, $g\\};", 5035 []>; 5036def SUST_B_3D_V2B64_CLAMP 5037 : NVPTXInst<(outs), 5038 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5039 Int64Regs:$r, Int64Regs:$g), 5040 "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5041 "\\{$r, $g\\};", 5042 []>; 5043def SUST_B_3D_V4B8_CLAMP 5044 : NVPTXInst<(outs), 5045 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5046 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5047 "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5048 "\\{$r, $g, $b, $a\\};", 5049 []>; 5050def SUST_B_3D_V4B16_CLAMP 5051 : NVPTXInst<(outs), 5052 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5053 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5054 "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5055 "\\{$r, $g, $b, $a\\};", 5056 []>; 5057def SUST_B_3D_V4B32_CLAMP 5058 : NVPTXInst<(outs), 5059 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5060 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5061 "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], " 5062 "\\{$r, $g, $b, $a\\};", 5063 []>; 5064 5065 5066// .trap variant 5067def SUST_B_1D_B8_TRAP 5068 : NVPTXInst<(outs), 5069 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5070 "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5071 []>; 5072def SUST_B_1D_B16_TRAP 5073 : NVPTXInst<(outs), 5074 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5075 "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5076 []>; 5077def SUST_B_1D_B32_TRAP 5078 : NVPTXInst<(outs), 5079 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5080 "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5081 []>; 5082def SUST_B_1D_B64_TRAP 5083 : NVPTXInst<(outs), 5084 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5085 "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};", 5086 []>; 5087def SUST_B_1D_V2B8_TRAP 5088 : NVPTXInst<(outs), 5089 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5090 "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5091 []>; 5092def SUST_B_1D_V2B16_TRAP 5093 : NVPTXInst<(outs), 5094 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5095 "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5096 []>; 5097def SUST_B_1D_V2B32_TRAP 5098 : NVPTXInst<(outs), 5099 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5100 "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5101 []>; 5102def SUST_B_1D_V2B64_TRAP 5103 : NVPTXInst<(outs), 5104 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5105 "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5106 []>; 5107def SUST_B_1D_V4B8_TRAP 5108 : NVPTXInst<(outs), 5109 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5110 Int16Regs:$b, Int16Regs:$a), 5111 "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5112 []>; 5113def SUST_B_1D_V4B16_TRAP 5114 : NVPTXInst<(outs), 5115 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5116 Int16Regs:$b, Int16Regs:$a), 5117 "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5118 []>; 5119def SUST_B_1D_V4B32_TRAP 5120 : NVPTXInst<(outs), 5121 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5122 Int32Regs:$b, Int32Regs:$a), 5123 "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5124 []>; 5125 5126 5127def SUST_B_1D_ARRAY_B8_TRAP 5128 : NVPTXInst<(outs), 5129 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5130 "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5131 []>; 5132def SUST_B_1D_ARRAY_B16_TRAP 5133 : NVPTXInst<(outs), 5134 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5135 "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5136 []>; 5137def SUST_B_1D_ARRAY_B32_TRAP 5138 : NVPTXInst<(outs), 5139 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5140 "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5141 []>; 5142def SUST_B_1D_ARRAY_B64_TRAP 5143 : NVPTXInst<(outs), 5144 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5145 "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5146 []>; 5147def SUST_B_1D_ARRAY_V2B8_TRAP 5148 : NVPTXInst<(outs), 5149 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5150 Int16Regs:$g), 5151 "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5152 []>; 5153def SUST_B_1D_ARRAY_V2B16_TRAP 5154 : NVPTXInst<(outs), 5155 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5156 Int16Regs:$g), 5157 "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5158 []>; 5159def SUST_B_1D_ARRAY_V2B32_TRAP 5160 : NVPTXInst<(outs), 5161 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5162 Int32Regs:$g), 5163 "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5164 []>; 5165def SUST_B_1D_ARRAY_V2B64_TRAP 5166 : NVPTXInst<(outs), 5167 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5168 Int64Regs:$g), 5169 "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5170 []>; 5171def SUST_B_1D_ARRAY_V4B8_TRAP 5172 : NVPTXInst<(outs), 5173 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5174 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5175 "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 5176 "\\{$r, $g, $b, $a\\};", 5177 []>; 5178def SUST_B_1D_ARRAY_V4B16_TRAP 5179 : NVPTXInst<(outs), 5180 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5181 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5182 "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 5183 "\\{$r, $g, $b, $a\\};", 5184 []>; 5185def SUST_B_1D_ARRAY_V4B32_TRAP 5186 : NVPTXInst<(outs), 5187 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5188 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5189 "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 5190 "\\{$r, $g, $b, $a\\};", 5191 []>; 5192 5193 5194def SUST_B_2D_B8_TRAP 5195 : NVPTXInst<(outs), 5196 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5197 "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5198 []>; 5199def SUST_B_2D_B16_TRAP 5200 : NVPTXInst<(outs), 5201 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5202 "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5203 []>; 5204def SUST_B_2D_B32_TRAP 5205 : NVPTXInst<(outs), 5206 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5207 "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5208 []>; 5209def SUST_B_2D_B64_TRAP 5210 : NVPTXInst<(outs), 5211 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5212 "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5213 []>; 5214def SUST_B_2D_V2B8_TRAP 5215 : NVPTXInst<(outs), 5216 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5217 Int16Regs:$g), 5218 "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5219 []>; 5220def SUST_B_2D_V2B16_TRAP 5221 : NVPTXInst<(outs), 5222 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5223 Int16Regs:$g), 5224 "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5225 []>; 5226def SUST_B_2D_V2B32_TRAP 5227 : NVPTXInst<(outs), 5228 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5229 Int32Regs:$g), 5230 "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5231 []>; 5232def SUST_B_2D_V2B64_TRAP 5233 : NVPTXInst<(outs), 5234 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5235 Int64Regs:$g), 5236 "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5237 []>; 5238def SUST_B_2D_V4B8_TRAP 5239 : NVPTXInst<(outs), 5240 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5241 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5242 "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 5243 "\\{$r, $g, $b, $a\\};", 5244 []>; 5245def SUST_B_2D_V4B16_TRAP 5246 : NVPTXInst<(outs), 5247 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5248 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5249 "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 5250 "\\{$r, $g, $b, $a\\};", 5251 []>; 5252def SUST_B_2D_V4B32_TRAP 5253 : NVPTXInst<(outs), 5254 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5255 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5256 "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 5257 "\\{$r, $g, $b, $a\\};", 5258 []>; 5259 5260 5261def SUST_B_2D_ARRAY_B8_TRAP 5262 : NVPTXInst<(outs), 5263 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5264 Int16Regs:$r), 5265 "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5266 []>; 5267def SUST_B_2D_ARRAY_B16_TRAP 5268 : NVPTXInst<(outs), 5269 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5270 Int16Regs:$r), 5271 "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5272 []>; 5273def SUST_B_2D_ARRAY_B32_TRAP 5274 : NVPTXInst<(outs), 5275 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5276 Int32Regs:$r), 5277 "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5278 []>; 5279def SUST_B_2D_ARRAY_B64_TRAP 5280 : NVPTXInst<(outs), 5281 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5282 Int64Regs:$r), 5283 "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5284 []>; 5285def SUST_B_2D_ARRAY_V2B8_TRAP 5286 : NVPTXInst<(outs), 5287 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5288 Int16Regs:$r, Int16Regs:$g), 5289 "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5290 "\\{$r, $g\\};", 5291 []>; 5292def SUST_B_2D_ARRAY_V2B16_TRAP 5293 : NVPTXInst<(outs), 5294 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5295 Int16Regs:$r, Int16Regs:$g), 5296 "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5297 "\\{$r, $g\\};", 5298 []>; 5299def SUST_B_2D_ARRAY_V2B32_TRAP 5300 : NVPTXInst<(outs), 5301 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5302 Int32Regs:$r, Int32Regs:$g), 5303 "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5304 "\\{$r, $g\\};", 5305 []>; 5306def SUST_B_2D_ARRAY_V2B64_TRAP 5307 : NVPTXInst<(outs), 5308 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5309 Int64Regs:$r, Int64Regs:$g), 5310 "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5311 "\\{$r, $g\\};", 5312 []>; 5313def SUST_B_2D_ARRAY_V4B8_TRAP 5314 : NVPTXInst<(outs), 5315 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5316 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5317 "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5318 "\\{$r, $g, $b, $a\\};", 5319 []>; 5320def SUST_B_2D_ARRAY_V4B16_TRAP 5321 : NVPTXInst<(outs), 5322 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5323 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5324 "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5325 "\\{$r, $g, $b, $a\\};", 5326 []>; 5327def SUST_B_2D_ARRAY_V4B32_TRAP 5328 : NVPTXInst<(outs), 5329 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5330 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5331 "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5332 "\\{$r, $g, $b, $a\\};", 5333 []>; 5334 5335 5336def SUST_B_3D_B8_TRAP 5337 : NVPTXInst<(outs), 5338 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5339 Int16Regs:$r), 5340 "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5341 []>; 5342def SUST_B_3D_B16_TRAP 5343 : NVPTXInst<(outs), 5344 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5345 Int16Regs:$r), 5346 "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5347 []>; 5348def SUST_B_3D_B32_TRAP 5349 : NVPTXInst<(outs), 5350 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5351 Int32Regs:$r), 5352 "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5353 []>; 5354def SUST_B_3D_B64_TRAP 5355 : NVPTXInst<(outs), 5356 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5357 Int64Regs:$r), 5358 "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5359 []>; 5360def SUST_B_3D_V2B8_TRAP 5361 : NVPTXInst<(outs), 5362 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5363 Int16Regs:$r, Int16Regs:$g), 5364 "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5365 "\\{$r, $g\\};", 5366 []>; 5367def SUST_B_3D_V2B16_TRAP 5368 : NVPTXInst<(outs), 5369 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5370 Int16Regs:$r, Int16Regs:$g), 5371 "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5372 "\\{$r, $g\\};", 5373 []>; 5374def SUST_B_3D_V2B32_TRAP 5375 : NVPTXInst<(outs), 5376 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5377 Int32Regs:$r, Int32Regs:$g), 5378 "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5379 "\\{$r, $g\\};", 5380 []>; 5381def SUST_B_3D_V2B64_TRAP 5382 : NVPTXInst<(outs), 5383 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5384 Int64Regs:$r, Int64Regs:$g), 5385 "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5386 "\\{$r, $g\\};", 5387 []>; 5388def SUST_B_3D_V4B8_TRAP 5389 : NVPTXInst<(outs), 5390 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5391 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5392 "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5393 "\\{$r, $g, $b, $a\\};", 5394 []>; 5395def SUST_B_3D_V4B16_TRAP 5396 : NVPTXInst<(outs), 5397 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5398 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5399 "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5400 "\\{$r, $g, $b, $a\\};", 5401 []>; 5402def SUST_B_3D_V4B32_TRAP 5403 : NVPTXInst<(outs), 5404 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5405 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5406 "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 5407 "\\{$r, $g, $b, $a\\};", 5408 []>; 5409 5410 5411// .zero variant 5412def SUST_B_1D_B8_ZERO 5413 : NVPTXInst<(outs), 5414 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5415 "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};", 5416 []>; 5417def SUST_B_1D_B16_ZERO 5418 : NVPTXInst<(outs), 5419 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5420 "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};", 5421 []>; 5422def SUST_B_1D_B32_ZERO 5423 : NVPTXInst<(outs), 5424 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5425 "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};", 5426 []>; 5427def SUST_B_1D_B64_ZERO 5428 : NVPTXInst<(outs), 5429 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5430 "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};", 5431 []>; 5432def SUST_B_1D_V2B8_ZERO 5433 : NVPTXInst<(outs), 5434 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5435 "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5436 []>; 5437def SUST_B_1D_V2B16_ZERO 5438 : NVPTXInst<(outs), 5439 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5440 "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5441 []>; 5442def SUST_B_1D_V2B32_ZERO 5443 : NVPTXInst<(outs), 5444 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5445 "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5446 []>; 5447def SUST_B_1D_V2B64_ZERO 5448 : NVPTXInst<(outs), 5449 (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5450 "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};", 5451 []>; 5452def SUST_B_1D_V4B8_ZERO 5453 : NVPTXInst<(outs), 5454 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5455 Int16Regs:$b, Int16Regs:$a), 5456 "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5457 []>; 5458def SUST_B_1D_V4B16_ZERO 5459 : NVPTXInst<(outs), 5460 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5461 Int16Regs:$b, Int16Regs:$a), 5462 "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5463 []>; 5464def SUST_B_1D_V4B32_ZERO 5465 : NVPTXInst<(outs), 5466 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5467 Int32Regs:$b, Int32Regs:$a), 5468 "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5469 []>; 5470 5471 5472def SUST_B_1D_ARRAY_B8_ZERO 5473 : NVPTXInst<(outs), 5474 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5475 "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5476 []>; 5477def SUST_B_1D_ARRAY_B16_ZERO 5478 : NVPTXInst<(outs), 5479 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5480 "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5481 []>; 5482def SUST_B_1D_ARRAY_B32_ZERO 5483 : NVPTXInst<(outs), 5484 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5485 "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5486 []>; 5487def SUST_B_1D_ARRAY_B64_ZERO 5488 : NVPTXInst<(outs), 5489 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r), 5490 "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5491 []>; 5492def SUST_B_1D_ARRAY_V2B8_ZERO 5493 : NVPTXInst<(outs), 5494 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5495 Int16Regs:$g), 5496 "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5497 []>; 5498def SUST_B_1D_ARRAY_V2B16_ZERO 5499 : NVPTXInst<(outs), 5500 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5501 Int16Regs:$g), 5502 "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5503 []>; 5504def SUST_B_1D_ARRAY_V2B32_ZERO 5505 : NVPTXInst<(outs), 5506 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5507 Int32Regs:$g), 5508 "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5509 []>; 5510def SUST_B_1D_ARRAY_V2B64_ZERO 5511 : NVPTXInst<(outs), 5512 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r, 5513 Int64Regs:$g), 5514 "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5515 []>; 5516def SUST_B_1D_ARRAY_V4B8_ZERO 5517 : NVPTXInst<(outs), 5518 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5519 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5520 "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], " 5521 "\\{$r, $g, $b, $a\\};", 5522 []>; 5523def SUST_B_1D_ARRAY_V4B16_ZERO 5524 : NVPTXInst<(outs), 5525 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5526 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5527 "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], " 5528 "\\{$r, $g, $b, $a\\};", 5529 []>; 5530def SUST_B_1D_ARRAY_V4B32_ZERO 5531 : NVPTXInst<(outs), 5532 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5533 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5534 "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], " 5535 "\\{$r, $g, $b, $a\\};", 5536 []>; 5537 5538 5539def SUST_B_2D_B8_ZERO 5540 : NVPTXInst<(outs), 5541 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5542 "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5543 []>; 5544def SUST_B_2D_B16_ZERO 5545 : NVPTXInst<(outs), 5546 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5547 "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5548 []>; 5549def SUST_B_2D_B32_ZERO 5550 : NVPTXInst<(outs), 5551 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5552 "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5553 []>; 5554def SUST_B_2D_B64_ZERO 5555 : NVPTXInst<(outs), 5556 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5557 "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};", 5558 []>; 5559def SUST_B_2D_V2B8_ZERO 5560 : NVPTXInst<(outs), 5561 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5562 Int16Regs:$g), 5563 "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5564 []>; 5565def SUST_B_2D_V2B16_ZERO 5566 : NVPTXInst<(outs), 5567 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5568 Int16Regs:$g), 5569 "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5570 []>; 5571def SUST_B_2D_V2B32_ZERO 5572 : NVPTXInst<(outs), 5573 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5574 Int32Regs:$g), 5575 "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5576 []>; 5577def SUST_B_2D_V2B64_ZERO 5578 : NVPTXInst<(outs), 5579 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5580 Int64Regs:$g), 5581 "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5582 []>; 5583def SUST_B_2D_V4B8_ZERO 5584 : NVPTXInst<(outs), 5585 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5586 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5587 "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], " 5588 "\\{$r, $g, $b, $a\\};", 5589 []>; 5590def SUST_B_2D_V4B16_ZERO 5591 : NVPTXInst<(outs), 5592 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5593 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5594 "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], " 5595 "\\{$r, $g, $b, $a\\};", 5596 []>; 5597def SUST_B_2D_V4B32_ZERO 5598 : NVPTXInst<(outs), 5599 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5600 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5601 "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], " 5602 "\\{$r, $g, $b, $a\\};", 5603 []>; 5604 5605 5606def SUST_B_2D_ARRAY_B8_ZERO 5607 : NVPTXInst<(outs), 5608 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5609 Int16Regs:$r), 5610 "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5611 []>; 5612def SUST_B_2D_ARRAY_B16_ZERO 5613 : NVPTXInst<(outs), 5614 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5615 Int16Regs:$r), 5616 "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5617 []>; 5618def SUST_B_2D_ARRAY_B32_ZERO 5619 : NVPTXInst<(outs), 5620 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5621 Int32Regs:$r), 5622 "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5623 []>; 5624def SUST_B_2D_ARRAY_B64_ZERO 5625 : NVPTXInst<(outs), 5626 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5627 Int64Regs:$r), 5628 "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5629 []>; 5630def SUST_B_2D_ARRAY_V2B8_ZERO 5631 : NVPTXInst<(outs), 5632 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5633 Int16Regs:$r, Int16Regs:$g), 5634 "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5635 "\\{$r, $g\\};", 5636 []>; 5637def SUST_B_2D_ARRAY_V2B16_ZERO 5638 : NVPTXInst<(outs), 5639 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5640 Int16Regs:$r, Int16Regs:$g), 5641 "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5642 "\\{$r, $g\\};", 5643 []>; 5644def SUST_B_2D_ARRAY_V2B32_ZERO 5645 : NVPTXInst<(outs), 5646 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5647 Int32Regs:$r, Int32Regs:$g), 5648 "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5649 "\\{$r, $g\\};", 5650 []>; 5651def SUST_B_2D_ARRAY_V2B64_ZERO 5652 : NVPTXInst<(outs), 5653 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5654 Int64Regs:$r, Int64Regs:$g), 5655 "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5656 "\\{$r, $g\\};", 5657 []>; 5658def SUST_B_2D_ARRAY_V4B8_ZERO 5659 : NVPTXInst<(outs), 5660 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5661 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5662 "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5663 "\\{$r, $g, $b, $a\\};", 5664 []>; 5665def SUST_B_2D_ARRAY_V4B16_ZERO 5666 : NVPTXInst<(outs), 5667 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5668 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5669 "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5670 "\\{$r, $g, $b, $a\\};", 5671 []>; 5672def SUST_B_2D_ARRAY_V4B32_ZERO 5673 : NVPTXInst<(outs), 5674 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5675 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5676 "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], " 5677 "\\{$r, $g, $b, $a\\};", 5678 []>; 5679 5680 5681def SUST_B_3D_B8_ZERO 5682 : NVPTXInst<(outs), 5683 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5684 Int16Regs:$r), 5685 "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5686 []>; 5687def SUST_B_3D_B16_ZERO 5688 : NVPTXInst<(outs), 5689 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5690 Int16Regs:$r), 5691 "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5692 []>; 5693def SUST_B_3D_B32_ZERO 5694 : NVPTXInst<(outs), 5695 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5696 Int32Regs:$r), 5697 "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5698 []>; 5699def SUST_B_3D_B64_ZERO 5700 : NVPTXInst<(outs), 5701 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5702 Int64Regs:$r), 5703 "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5704 []>; 5705def SUST_B_3D_V2B8_ZERO 5706 : NVPTXInst<(outs), 5707 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5708 Int16Regs:$r, Int16Regs:$g), 5709 "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5710 "\\{$r, $g\\};", 5711 []>; 5712def SUST_B_3D_V2B16_ZERO 5713 : NVPTXInst<(outs), 5714 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5715 Int16Regs:$r, Int16Regs:$g), 5716 "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5717 "\\{$r, $g\\};", 5718 []>; 5719def SUST_B_3D_V2B32_ZERO 5720 : NVPTXInst<(outs), 5721 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5722 Int32Regs:$r, Int32Regs:$g), 5723 "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5724 "\\{$r, $g\\};", 5725 []>; 5726def SUST_B_3D_V2B64_ZERO 5727 : NVPTXInst<(outs), 5728 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5729 Int64Regs:$r, Int64Regs:$g), 5730 "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5731 "\\{$r, $g\\};", 5732 []>; 5733def SUST_B_3D_V4B8_ZERO 5734 : NVPTXInst<(outs), 5735 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5736 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5737 "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5738 "\\{$r, $g, $b, $a\\};", 5739 []>; 5740def SUST_B_3D_V4B16_ZERO 5741 : NVPTXInst<(outs), 5742 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5743 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5744 "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5745 "\\{$r, $g, $b, $a\\};", 5746 []>; 5747def SUST_B_3D_V4B32_ZERO 5748 : NVPTXInst<(outs), 5749 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5750 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5751 "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], " 5752 "\\{$r, $g, $b, $a\\};", 5753 []>; 5754 5755 5756 5757// Formatted 5758 5759def SUST_P_1D_B8_TRAP 5760 : NVPTXInst<(outs), 5761 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5762 "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", 5763 []>; 5764def SUST_P_1D_B16_TRAP 5765 : NVPTXInst<(outs), 5766 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5767 "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", 5768 []>; 5769def SUST_P_1D_B32_TRAP 5770 : NVPTXInst<(outs), 5771 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5772 "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", 5773 []>; 5774def SUST_P_1D_V2B8_TRAP 5775 : NVPTXInst<(outs), 5776 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5777 "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5778 []>; 5779def SUST_P_1D_V2B16_TRAP 5780 : NVPTXInst<(outs), 5781 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5782 "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5783 []>; 5784def SUST_P_1D_V2B32_TRAP 5785 : NVPTXInst<(outs), 5786 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5787 "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", 5788 []>; 5789def SUST_P_1D_V4B8_TRAP 5790 : NVPTXInst<(outs), 5791 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5792 Int16Regs:$b, Int16Regs:$a), 5793 "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5794 []>; 5795def SUST_P_1D_V4B16_TRAP 5796 : NVPTXInst<(outs), 5797 (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, 5798 Int16Regs:$b, Int16Regs:$a), 5799 "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5800 []>; 5801def SUST_P_1D_V4B32_TRAP 5802 : NVPTXInst<(outs), 5803 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, 5804 Int32Regs:$b, Int32Regs:$a), 5805 "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5806 []>; 5807 5808 5809def SUST_P_1D_ARRAY_B8_TRAP 5810 : NVPTXInst<(outs), 5811 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5812 "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5813 []>; 5814def SUST_P_1D_ARRAY_B16_TRAP 5815 : NVPTXInst<(outs), 5816 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), 5817 "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5818 []>; 5819def SUST_P_1D_ARRAY_B32_TRAP 5820 : NVPTXInst<(outs), 5821 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), 5822 "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5823 []>; 5824def SUST_P_1D_ARRAY_V2B8_TRAP 5825 : NVPTXInst<(outs), 5826 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5827 Int16Regs:$g), 5828 "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5829 []>; 5830def SUST_P_1D_ARRAY_V2B16_TRAP 5831 : NVPTXInst<(outs), 5832 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5833 Int16Regs:$g), 5834 "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5835 []>; 5836def SUST_P_1D_ARRAY_V2B32_TRAP 5837 : NVPTXInst<(outs), 5838 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5839 Int32Regs:$g), 5840 "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5841 []>; 5842def SUST_P_1D_ARRAY_V4B8_TRAP 5843 : NVPTXInst<(outs), 5844 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5845 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5846 "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " 5847 "\\{$r, $g, $b, $a\\};", 5848 []>; 5849def SUST_P_1D_ARRAY_V4B16_TRAP 5850 : NVPTXInst<(outs), 5851 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, 5852 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5853 "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " 5854 "\\{$r, $g, $b, $a\\};", 5855 []>; 5856def SUST_P_1D_ARRAY_V4B32_TRAP 5857 : NVPTXInst<(outs), 5858 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, 5859 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5860 "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " 5861 "\\{$r, $g, $b, $a\\};", 5862 []>; 5863 5864 5865def SUST_P_2D_B8_TRAP 5866 : NVPTXInst<(outs), 5867 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5868 "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5869 []>; 5870def SUST_P_2D_B16_TRAP 5871 : NVPTXInst<(outs), 5872 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5873 "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5874 []>; 5875def SUST_P_2D_B32_TRAP 5876 : NVPTXInst<(outs), 5877 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5878 "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", 5879 []>; 5880def SUST_P_2D_V2B8_TRAP 5881 : NVPTXInst<(outs), 5882 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5883 Int16Regs:$g), 5884 "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5885 []>; 5886def SUST_P_2D_V2B16_TRAP 5887 : NVPTXInst<(outs), 5888 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5889 Int16Regs:$g), 5890 "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5891 []>; 5892def SUST_P_2D_V2B32_TRAP 5893 : NVPTXInst<(outs), 5894 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5895 Int32Regs:$g), 5896 "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5897 []>; 5898def SUST_P_2D_V4B8_TRAP 5899 : NVPTXInst<(outs), 5900 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5901 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5902 "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " 5903 "\\{$r, $g, $b, $a\\};", 5904 []>; 5905def SUST_P_2D_V4B16_TRAP 5906 : NVPTXInst<(outs), 5907 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, 5908 Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5909 "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " 5910 "\\{$r, $g, $b, $a\\};", 5911 []>; 5912def SUST_P_2D_V4B32_TRAP 5913 : NVPTXInst<(outs), 5914 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5915 Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5916 "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " 5917 "\\{$r, $g, $b, $a\\};", 5918 []>; 5919 5920 5921def SUST_P_2D_ARRAY_B8_TRAP 5922 : NVPTXInst<(outs), 5923 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5924 Int16Regs:$r), 5925 "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5926 []>; 5927def SUST_P_2D_ARRAY_B16_TRAP 5928 : NVPTXInst<(outs), 5929 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5930 Int16Regs:$r), 5931 "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5932 []>; 5933def SUST_P_2D_ARRAY_B32_TRAP 5934 : NVPTXInst<(outs), 5935 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5936 Int32Regs:$r), 5937 "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5938 []>; 5939def SUST_P_2D_ARRAY_V2B8_TRAP 5940 : NVPTXInst<(outs), 5941 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5942 Int16Regs:$r, Int16Regs:$g), 5943 "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5944 "\\{$r, $g\\};", 5945 []>; 5946def SUST_P_2D_ARRAY_V2B16_TRAP 5947 : NVPTXInst<(outs), 5948 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5949 Int16Regs:$r, Int16Regs:$g), 5950 "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5951 "\\{$r, $g\\};", 5952 []>; 5953def SUST_P_2D_ARRAY_V2B32_TRAP 5954 : NVPTXInst<(outs), 5955 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5956 Int32Regs:$r, Int32Regs:$g), 5957 "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5958 "\\{$r, $g\\};", 5959 []>; 5960def SUST_P_2D_ARRAY_V4B8_TRAP 5961 : NVPTXInst<(outs), 5962 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5963 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5964 "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5965 "\\{$r, $g, $b, $a\\};", 5966 []>; 5967def SUST_P_2D_ARRAY_V4B16_TRAP 5968 : NVPTXInst<(outs), 5969 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5970 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5971 "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5972 "\\{$r, $g, $b, $a\\};", 5973 []>; 5974def SUST_P_2D_ARRAY_V4B32_TRAP 5975 : NVPTXInst<(outs), 5976 (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5977 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5978 "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " 5979 "\\{$r, $g, $b, $a\\};", 5980 []>; 5981 5982 5983def SUST_P_3D_B8_TRAP 5984 : NVPTXInst<(outs), 5985 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5986 Int16Regs:$r), 5987 "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5988 []>; 5989def SUST_P_3D_B16_TRAP 5990 : NVPTXInst<(outs), 5991 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5992 Int16Regs:$r), 5993 "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5994 []>; 5995def SUST_P_3D_B32_TRAP 5996 : NVPTXInst<(outs), 5997 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5998 Int32Regs:$r), 5999 "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 6000 []>; 6001def SUST_P_3D_V2B8_TRAP 6002 : NVPTXInst<(outs), 6003 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6004 Int16Regs:$r, Int16Regs:$g), 6005 "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6006 "\\{$r, $g\\};", 6007 []>; 6008def SUST_P_3D_V2B16_TRAP 6009 : NVPTXInst<(outs), 6010 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6011 Int16Regs:$r, Int16Regs:$g), 6012 "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6013 "\\{$r, $g\\};", 6014 []>; 6015def SUST_P_3D_V2B32_TRAP 6016 : NVPTXInst<(outs), 6017 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6018 Int32Regs:$r, Int32Regs:$g), 6019 "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6020 "\\{$r, $g\\};", 6021 []>; 6022def SUST_P_3D_V4B8_TRAP 6023 : NVPTXInst<(outs), 6024 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6025 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6026 "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6027 "\\{$r, $g, $b, $a\\};", 6028 []>; 6029def SUST_P_3D_V4B16_TRAP 6030 : NVPTXInst<(outs), 6031 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6032 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6033 "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6034 "\\{$r, $g, $b, $a\\};", 6035 []>; 6036def SUST_P_3D_V4B32_TRAP 6037 : NVPTXInst<(outs), 6038 (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6039 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6040 "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " 6041 "\\{$r, $g, $b, $a\\};", 6042 []>; 6043} 6044 6045// Surface store instruction patterns 6046// I'm not sure why we can't just include these in the instruction definitions, 6047// but TableGen complains of type errors :( 6048 6049// .clamp variant 6050def : Pat<(int_nvvm_sust_b_1d_i8_clamp 6051 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6052 (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6053 6054def : Pat<(int_nvvm_sust_b_1d_i16_clamp 6055 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6056 (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6057 6058def : Pat<(int_nvvm_sust_b_1d_i32_clamp 6059 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6060 (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6061 6062def : Pat<(int_nvvm_sust_b_1d_i64_clamp 6063 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6064 (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6065 6066def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 6067 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6068 (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6069 Int16Regs:$r, Int16Regs:$g)>; 6070 6071def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 6072 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6073 (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6074 Int16Regs:$r, Int16Regs:$g)>; 6075 6076def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 6077 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6078 (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6079 Int32Regs:$r, Int32Regs:$g)>; 6080 6081def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 6082 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6083 (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, 6084 Int64Regs:$r, Int64Regs:$g)>; 6085 6086def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 6087 Int64Regs:$s, Int32Regs:$x, 6088 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6089 (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, 6090 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6091 6092def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 6093 Int64Regs:$s, Int32Regs:$x, 6094 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6095 (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, 6096 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6097 6098def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 6099 Int64Regs:$s, Int32Regs:$x, 6100 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6101 (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, 6102 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6103 6104 6105 6106def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 6107 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6108 (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6109 Int16Regs:$r)>; 6110 6111def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 6112 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6113 (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6114 Int16Regs:$r)>; 6115 6116def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 6117 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6118 (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6119 Int32Regs:$r)>; 6120 6121def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 6122 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6123 (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6124 Int64Regs:$r)>; 6125 6126def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 6127 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6128 (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6129 Int16Regs:$r, Int16Regs:$g)>; 6130 6131def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 6132 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6133 (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6134 Int16Regs:$r, Int16Regs:$g)>; 6135 6136def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 6137 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6138 (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6139 Int32Regs:$r, Int32Regs:$g)>; 6140 6141def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 6142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6143 (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6144 Int64Regs:$r, Int64Regs:$g)>; 6145 6146def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 6147 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6148 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6149 (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6150 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6151 6152def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 6153 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6154 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6155 (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6156 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6157 6158def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 6159 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6160 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6161 (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6162 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6163 6164 6165 6166def : Pat<(int_nvvm_sust_b_2d_i8_clamp 6167 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6168 (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6169 Int16Regs:$r)>; 6170 6171def : Pat<(int_nvvm_sust_b_2d_i16_clamp 6172 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6173 (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6174 Int16Regs:$r)>; 6175 6176def : Pat<(int_nvvm_sust_b_2d_i32_clamp 6177 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6178 (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6179 Int32Regs:$r)>; 6180 6181def : Pat<(int_nvvm_sust_b_2d_i64_clamp 6182 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6183 (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6184 Int64Regs:$r)>; 6185 6186def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 6187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6188 (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6189 Int16Regs:$r, Int16Regs:$g)>; 6190 6191def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 6192 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6193 (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6194 Int16Regs:$r, Int16Regs:$g)>; 6195 6196def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 6197 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6198 (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6199 Int32Regs:$r, Int32Regs:$g)>; 6200 6201def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 6202 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6203 (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6204 Int64Regs:$r, Int64Regs:$g)>; 6205 6206def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 6207 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6208 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6209 (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6210 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6211 6212def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 6213 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6214 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6215 (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6216 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6217 6218def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 6219 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6220 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6221 (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6222 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6223 6224 6225 6226def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 6227 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6228 (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s, 6229 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6230 Int16Regs:$r)>; 6231 6232def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 6233 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6234 (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s, 6235 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6236 Int16Regs:$r)>; 6237 6238def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 6239 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6240 (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s, 6241 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6242 Int32Regs:$r)>; 6243 6244def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 6245 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6246 (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s, 6247 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6248 Int64Regs:$r)>; 6249 6250def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 6251 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6252 Int16Regs:$r, Int16Regs:$g), 6253 (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, 6254 Int32Regs:$x, Int32Regs:$y, 6255 Int16Regs:$r, Int16Regs:$g)>; 6256 6257def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 6258 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6259 Int16Regs:$r, Int16Regs:$g), 6260 (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, 6261 Int32Regs:$x, Int32Regs:$y, 6262 Int16Regs:$r, Int16Regs:$g)>; 6263 6264def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 6265 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6266 Int32Regs:$g), 6267 (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6268 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6269 6270def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 6271 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6272 Int64Regs:$g), 6273 (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, 6274 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6275 6276def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 6277 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6278 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6279 (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s, 6280 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6281 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6282 6283def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 6284 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6285 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6286 (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s, 6287 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6288 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6289 6290def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 6291 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6292 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6293 (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, 6294 Int32Regs:$x, Int32Regs:$y, 6295 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6296 6297 6298 6299def : Pat<(int_nvvm_sust_b_3d_i8_clamp 6300 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6301 Int16Regs:$r), 6302 (SUST_B_3D_B8_CLAMP Int64Regs:$s, 6303 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6304 Int16Regs:$r)>; 6305 6306def : Pat<(int_nvvm_sust_b_3d_i16_clamp 6307 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6308 Int16Regs:$r), 6309 (SUST_B_3D_B16_CLAMP Int64Regs:$s, 6310 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6311 Int16Regs:$r)>; 6312 6313def : Pat<(int_nvvm_sust_b_3d_i32_clamp 6314 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6315 Int32Regs:$r), 6316 (SUST_B_3D_B32_CLAMP Int64Regs:$s, 6317 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6318 Int32Regs:$r)>; 6319 6320def : Pat<(int_nvvm_sust_b_3d_i64_clamp 6321 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6322 Int64Regs:$r), 6323 (SUST_B_3D_B64_CLAMP Int64Regs:$s, 6324 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6325 Int64Regs:$r)>; 6326 6327def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 6328 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6329 Int16Regs:$r, Int16Regs:$g), 6330 (SUST_B_3D_V2B8_CLAMP Int64Regs:$s, 6331 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6332 Int16Regs:$r, Int16Regs:$g)>; 6333 6334def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 6335 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6336 Int16Regs:$r, Int16Regs:$g), 6337 (SUST_B_3D_V2B16_CLAMP Int64Regs:$s, 6338 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6339 Int16Regs:$r, Int16Regs:$g)>; 6340 6341def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 6342 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6343 Int32Regs:$r, Int32Regs:$g), 6344 (SUST_B_3D_V2B32_CLAMP Int64Regs:$s, 6345 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6346 Int32Regs:$r, Int32Regs:$g)>; 6347 6348def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 6349 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6350 Int64Regs:$r, Int64Regs:$g), 6351 (SUST_B_3D_V2B64_CLAMP Int64Regs:$s, 6352 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6353 Int64Regs:$r, Int64Regs:$g)>; 6354 6355def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 6356 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6357 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6358 (SUST_B_3D_V4B8_CLAMP Int64Regs:$s, 6359 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6360 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6361 6362def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 6363 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6364 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6365 (SUST_B_3D_V4B16_CLAMP Int64Regs:$s, 6366 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6367 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6368 6369def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 6370 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6371 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6372 (SUST_B_3D_V4B32_CLAMP Int64Regs:$s, 6373 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6374 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6375 6376 6377// .trap variant 6378def : Pat<(int_nvvm_sust_b_1d_i8_trap 6379 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6380 (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6381 6382def : Pat<(int_nvvm_sust_b_1d_i16_trap 6383 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6384 (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6385 6386def : Pat<(int_nvvm_sust_b_1d_i32_trap 6387 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6388 (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6389 6390def : Pat<(int_nvvm_sust_b_1d_i64_trap 6391 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6392 (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6393 6394def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 6395 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6396 (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 6397 Int16Regs:$r, Int16Regs:$g)>; 6398 6399def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 6400 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6401 (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 6402 Int16Regs:$r, Int16Regs:$g)>; 6403 6404def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 6405 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6406 (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 6407 Int32Regs:$r, Int32Regs:$g)>; 6408 6409def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 6410 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6411 (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, 6412 Int64Regs:$r, Int64Regs:$g)>; 6413 6414def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 6415 Int64Regs:$s, Int32Regs:$x, 6416 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6417 (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 6418 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6419 6420def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 6421 Int64Regs:$s, Int32Regs:$x, 6422 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6423 (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 6424 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6425 6426def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 6427 Int64Regs:$s, Int32Regs:$x, 6428 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6429 (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 6430 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6431 6432 6433 6434def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 6435 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6436 (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6437 Int16Regs:$r)>; 6438 6439def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 6440 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6441 (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6442 Int16Regs:$r)>; 6443 6444def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 6445 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6446 (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6447 Int32Regs:$r)>; 6448 6449def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 6450 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6451 (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6452 Int64Regs:$r)>; 6453 6454def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 6455 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6456 (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6457 Int16Regs:$r, Int16Regs:$g)>; 6458 6459def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 6460 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6461 (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6462 Int16Regs:$r, Int16Regs:$g)>; 6463 6464def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 6465 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6466 (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6467 Int32Regs:$r, Int32Regs:$g)>; 6468 6469def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 6470 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6471 (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6472 Int64Regs:$r, Int64Regs:$g)>; 6473 6474def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 6475 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6476 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6477 (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6478 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6479 6480def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 6481 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6482 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6483 (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6484 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6485 6486def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 6487 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6488 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6489 (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6490 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6491 6492 6493 6494def : Pat<(int_nvvm_sust_b_2d_i8_trap 6495 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6496 (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6497 Int16Regs:$r)>; 6498 6499def : Pat<(int_nvvm_sust_b_2d_i16_trap 6500 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6501 (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6502 Int16Regs:$r)>; 6503 6504def : Pat<(int_nvvm_sust_b_2d_i32_trap 6505 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6506 (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6507 Int32Regs:$r)>; 6508 6509def : Pat<(int_nvvm_sust_b_2d_i64_trap 6510 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6511 (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6512 Int64Regs:$r)>; 6513 6514def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 6515 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6516 (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6517 Int16Regs:$r, Int16Regs:$g)>; 6518 6519def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 6520 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6521 (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6522 Int16Regs:$r, Int16Regs:$g)>; 6523 6524def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 6525 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6526 (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6527 Int32Regs:$r, Int32Regs:$g)>; 6528 6529def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 6530 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6531 (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6532 Int64Regs:$r, Int64Regs:$g)>; 6533 6534def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 6535 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6536 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6537 (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6538 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6539 6540def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 6541 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6542 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6543 (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6544 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6545 6546def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 6547 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6548 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6549 (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6550 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6551 6552 6553 6554def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 6555 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6556 (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, 6557 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6558 Int16Regs:$r)>; 6559 6560def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 6561 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6562 (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, 6563 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6564 Int16Regs:$r)>; 6565 6566def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 6567 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6568 (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, 6569 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6570 Int32Regs:$r)>; 6571 6572def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 6573 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6574 (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s, 6575 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6576 Int64Regs:$r)>; 6577 6578def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 6579 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6580 Int16Regs:$r, Int16Regs:$g), 6581 (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 6582 Int32Regs:$x, Int32Regs:$y, 6583 Int16Regs:$r, Int16Regs:$g)>; 6584 6585def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 6586 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6587 Int16Regs:$r, Int16Regs:$g), 6588 (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 6589 Int32Regs:$x, Int32Regs:$y, 6590 Int16Regs:$r, Int16Regs:$g)>; 6591 6592def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 6593 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6594 Int32Regs:$g), 6595 (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 6596 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6597 6598def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 6599 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6600 Int64Regs:$g), 6601 (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, 6602 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6603 6604def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 6605 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6606 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6607 (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 6608 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6609 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6610 6611def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 6612 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6613 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6614 (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 6615 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6616 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6617 6618def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 6619 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6620 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6621 (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 6622 Int32Regs:$x, Int32Regs:$y, 6623 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6624 6625 6626 6627def : Pat<(int_nvvm_sust_b_3d_i8_trap 6628 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6629 Int16Regs:$r), 6630 (SUST_B_3D_B8_TRAP Int64Regs:$s, 6631 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6632 Int16Regs:$r)>; 6633 6634def : Pat<(int_nvvm_sust_b_3d_i16_trap 6635 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6636 Int16Regs:$r), 6637 (SUST_B_3D_B16_TRAP Int64Regs:$s, 6638 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6639 Int16Regs:$r)>; 6640 6641def : Pat<(int_nvvm_sust_b_3d_i32_trap 6642 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6643 Int32Regs:$r), 6644 (SUST_B_3D_B32_TRAP Int64Regs:$s, 6645 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6646 Int32Regs:$r)>; 6647 6648def : Pat<(int_nvvm_sust_b_3d_i64_trap 6649 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6650 Int64Regs:$r), 6651 (SUST_B_3D_B64_TRAP Int64Regs:$s, 6652 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6653 Int64Regs:$r)>; 6654 6655def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 6656 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6657 Int16Regs:$r, Int16Regs:$g), 6658 (SUST_B_3D_V2B8_TRAP Int64Regs:$s, 6659 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6660 Int16Regs:$r, Int16Regs:$g)>; 6661 6662def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 6663 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6664 Int16Regs:$r, Int16Regs:$g), 6665 (SUST_B_3D_V2B16_TRAP Int64Regs:$s, 6666 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6667 Int16Regs:$r, Int16Regs:$g)>; 6668 6669def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 6670 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6671 Int32Regs:$r, Int32Regs:$g), 6672 (SUST_B_3D_V2B32_TRAP Int64Regs:$s, 6673 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6674 Int32Regs:$r, Int32Regs:$g)>; 6675 6676def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 6677 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6678 Int64Regs:$r, Int64Regs:$g), 6679 (SUST_B_3D_V2B64_TRAP Int64Regs:$s, 6680 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6681 Int64Regs:$r, Int64Regs:$g)>; 6682 6683def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 6684 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6685 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6686 (SUST_B_3D_V4B8_TRAP Int64Regs:$s, 6687 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6688 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6689 6690def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 6691 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6692 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6693 (SUST_B_3D_V4B16_TRAP Int64Regs:$s, 6694 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6695 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6696 6697def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 6698 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6699 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6700 (SUST_B_3D_V4B32_TRAP Int64Regs:$s, 6701 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6702 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6703 6704 6705// .zero variant 6706def : Pat<(int_nvvm_sust_b_1d_i8_zero 6707 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6708 (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6709 6710def : Pat<(int_nvvm_sust_b_1d_i16_zero 6711 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6712 (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6713 6714def : Pat<(int_nvvm_sust_b_1d_i32_zero 6715 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6716 (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6717 6718def : Pat<(int_nvvm_sust_b_1d_i64_zero 6719 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6720 (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6721 6722def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 6723 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6724 (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, 6725 Int16Regs:$r, Int16Regs:$g)>; 6726 6727def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 6728 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6729 (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, 6730 Int16Regs:$r, Int16Regs:$g)>; 6731 6732def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 6733 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6734 (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, 6735 Int32Regs:$r, Int32Regs:$g)>; 6736 6737def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 6738 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6739 (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, 6740 Int64Regs:$r, Int64Regs:$g)>; 6741 6742def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 6743 Int64Regs:$s, Int32Regs:$x, 6744 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6745 (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, 6746 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6747 6748def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 6749 Int64Regs:$s, Int32Regs:$x, 6750 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6751 (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, 6752 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6753 6754def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 6755 Int64Regs:$s, Int32Regs:$x, 6756 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6757 (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, 6758 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6759 6760 6761 6762def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 6763 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6764 (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6765 Int16Regs:$r)>; 6766 6767def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 6768 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6769 (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6770 Int16Regs:$r)>; 6771 6772def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 6773 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6774 (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6775 Int32Regs:$r)>; 6776 6777def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 6778 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6779 (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6780 Int64Regs:$r)>; 6781 6782def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 6783 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6784 (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6785 Int16Regs:$r, Int16Regs:$g)>; 6786 6787def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 6788 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6789 (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6790 Int16Regs:$r, Int16Regs:$g)>; 6791 6792def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 6793 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6794 (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6795 Int32Regs:$r, Int32Regs:$g)>; 6796 6797def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 6798 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6799 (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6800 Int64Regs:$r, Int64Regs:$g)>; 6801 6802def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 6803 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6804 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6805 (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6806 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6807 6808def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 6809 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6810 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6811 (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6812 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6813 6814def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 6815 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6816 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6817 (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6818 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6819 6820 6821 6822def : Pat<(int_nvvm_sust_b_2d_i8_zero 6823 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6824 (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6825 Int16Regs:$r)>; 6826 6827def : Pat<(int_nvvm_sust_b_2d_i16_zero 6828 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6829 (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6830 Int16Regs:$r)>; 6831 6832def : Pat<(int_nvvm_sust_b_2d_i32_zero 6833 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6834 (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6835 Int32Regs:$r)>; 6836 6837def : Pat<(int_nvvm_sust_b_2d_i64_zero 6838 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6839 (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6840 Int64Regs:$r)>; 6841 6842def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 6843 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6844 (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6845 Int16Regs:$r, Int16Regs:$g)>; 6846 6847def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 6848 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6849 (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6850 Int16Regs:$r, Int16Regs:$g)>; 6851 6852def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 6853 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6854 (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6855 Int32Regs:$r, Int32Regs:$g)>; 6856 6857def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 6858 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6859 (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6860 Int64Regs:$r, Int64Regs:$g)>; 6861 6862def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 6863 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6864 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6865 (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6866 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6867 6868def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 6869 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6870 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6871 (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6872 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6873 6874def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 6875 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6876 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6877 (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6878 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6879 6880 6881 6882def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 6883 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6884 (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s, 6885 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6886 Int16Regs:$r)>; 6887 6888def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 6889 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6890 (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s, 6891 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6892 Int16Regs:$r)>; 6893 6894def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 6895 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6896 (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s, 6897 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6898 Int32Regs:$r)>; 6899 6900def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 6901 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6902 (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s, 6903 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6904 Int64Regs:$r)>; 6905 6906def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 6907 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6908 Int16Regs:$r, Int16Regs:$g), 6909 (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, 6910 Int32Regs:$x, Int32Regs:$y, 6911 Int16Regs:$r, Int16Regs:$g)>; 6912 6913def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 6914 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6915 Int16Regs:$r, Int16Regs:$g), 6916 (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, 6917 Int32Regs:$x, Int32Regs:$y, 6918 Int16Regs:$r, Int16Regs:$g)>; 6919 6920def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 6921 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6922 Int32Regs:$g), 6923 (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, 6924 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6925 6926def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 6927 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6928 Int64Regs:$g), 6929 (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, 6930 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6931 6932def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 6933 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6934 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6935 (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s, 6936 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6937 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6938 6939def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 6940 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6941 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6942 (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s, 6943 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6944 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6945 6946def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 6947 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6948 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6949 (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, 6950 Int32Regs:$x, Int32Regs:$y, 6951 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6952 6953 6954 6955def : Pat<(int_nvvm_sust_b_3d_i8_zero 6956 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6957 Int16Regs:$r), 6958 (SUST_B_3D_B8_ZERO Int64Regs:$s, 6959 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6960 Int16Regs:$r)>; 6961 6962def : Pat<(int_nvvm_sust_b_3d_i16_zero 6963 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6964 Int16Regs:$r), 6965 (SUST_B_3D_B16_ZERO Int64Regs:$s, 6966 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6967 Int16Regs:$r)>; 6968 6969def : Pat<(int_nvvm_sust_b_3d_i32_zero 6970 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6971 Int32Regs:$r), 6972 (SUST_B_3D_B32_ZERO Int64Regs:$s, 6973 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6974 Int32Regs:$r)>; 6975 6976def : Pat<(int_nvvm_sust_b_3d_i64_zero 6977 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6978 Int64Regs:$r), 6979 (SUST_B_3D_B64_ZERO Int64Regs:$s, 6980 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6981 Int64Regs:$r)>; 6982 6983def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 6984 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6985 Int16Regs:$r, Int16Regs:$g), 6986 (SUST_B_3D_V2B8_ZERO Int64Regs:$s, 6987 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6988 Int16Regs:$r, Int16Regs:$g)>; 6989 6990def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 6991 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6992 Int16Regs:$r, Int16Regs:$g), 6993 (SUST_B_3D_V2B16_ZERO Int64Regs:$s, 6994 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6995 Int16Regs:$r, Int16Regs:$g)>; 6996 6997def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 6998 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6999 Int32Regs:$r, Int32Regs:$g), 7000 (SUST_B_3D_V2B32_ZERO Int64Regs:$s, 7001 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7002 Int32Regs:$r, Int32Regs:$g)>; 7003 7004def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 7005 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7006 Int64Regs:$r, Int64Regs:$g), 7007 (SUST_B_3D_V2B64_ZERO Int64Regs:$s, 7008 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7009 Int64Regs:$r, Int64Regs:$g)>; 7010 7011def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 7012 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7013 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7014 (SUST_B_3D_V4B8_ZERO Int64Regs:$s, 7015 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7016 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7017 7018def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 7019 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7020 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7021 (SUST_B_3D_V4B16_ZERO Int64Regs:$s, 7022 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7023 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7024 7025def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 7026 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7027 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7028 (SUST_B_3D_V4B32_ZERO Int64Regs:$s, 7029 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7030 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7031 7032 7033 7034 7035def : Pat<(int_nvvm_sust_p_1d_i8_trap 7036 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 7037 (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 7038 7039def : Pat<(int_nvvm_sust_p_1d_i16_trap 7040 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 7041 (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 7042 7043def : Pat<(int_nvvm_sust_p_1d_i32_trap 7044 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 7045 (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 7046 7047def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 7048 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7049 (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, 7050 Int16Regs:$r, Int16Regs:$g)>; 7051 7052def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 7053 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7054 (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, 7055 Int16Regs:$r, Int16Regs:$g)>; 7056 7057def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 7058 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7059 (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, 7060 Int32Regs:$r, Int32Regs:$g)>; 7061 7062def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 7063 Int64Regs:$s, Int32Regs:$x, 7064 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7065 (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, 7066 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7067 7068def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 7069 Int64Regs:$s, Int32Regs:$x, 7070 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7071 (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, 7072 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7073 7074def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 7075 Int64Regs:$s, Int32Regs:$x, 7076 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7077 (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, 7078 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7079 7080 7081 7082def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 7083 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7084 (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7085 Int16Regs:$r)>; 7086 7087def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 7088 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 7089 (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7090 Int16Regs:$r)>; 7091 7092def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 7093 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 7094 (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7095 Int32Regs:$r)>; 7096 7097def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 7098 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7099 (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7100 Int16Regs:$r, Int16Regs:$g)>; 7101 7102def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 7103 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 7104 (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7105 Int16Regs:$r, Int16Regs:$g)>; 7106 7107def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 7108 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 7109 (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7110 Int32Regs:$r, Int32Regs:$g)>; 7111 7112def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 7113 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7114 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7115 (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7116 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7117 7118def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 7119 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7120 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7121 (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7122 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7123 7124def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 7125 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7126 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7127 (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 7128 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7129 7130 7131 7132def : Pat<(int_nvvm_sust_p_2d_i8_trap 7133 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7134 (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7135 Int16Regs:$r)>; 7136 7137def : Pat<(int_nvvm_sust_p_2d_i16_trap 7138 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7139 (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7140 Int16Regs:$r)>; 7141 7142def : Pat<(int_nvvm_sust_p_2d_i32_trap 7143 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7144 (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7145 Int32Regs:$r)>; 7146 7147def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 7148 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7149 (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7150 Int16Regs:$r, Int16Regs:$g)>; 7151 7152def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 7153 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 7154 (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7155 Int16Regs:$r, Int16Regs:$g)>; 7156 7157def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 7158 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 7159 (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7160 Int32Regs:$r, Int32Regs:$g)>; 7161 7162def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 7163 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7164 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7165 (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7166 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7167 7168def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 7169 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7170 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7171 (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7172 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7173 7174def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 7175 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7176 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7177 (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 7178 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7179 7180 7181 7182def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 7183 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7184 (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, 7185 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7186 Int16Regs:$r)>; 7187 7188def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 7189 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 7190 (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, 7191 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7192 Int16Regs:$r)>; 7193 7194def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 7195 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 7196 (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, 7197 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7198 Int32Regs:$r)>; 7199 7200def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 7201 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7202 Int16Regs:$r, Int16Regs:$g), 7203 (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, 7204 Int32Regs:$x, Int32Regs:$y, 7205 Int16Regs:$r, Int16Regs:$g)>; 7206 7207def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 7208 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7209 Int16Regs:$r, Int16Regs:$g), 7210 (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, 7211 Int32Regs:$x, Int32Regs:$y, 7212 Int16Regs:$r, Int16Regs:$g)>; 7213 7214def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 7215 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 7216 Int32Regs:$g), 7217 (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, 7218 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 7219 7220def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 7221 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7222 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7223 (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, 7224 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7225 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7226 7227def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 7228 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7229 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7230 (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, 7231 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7232 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7233 7234def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 7235 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 7236 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7237 (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, 7238 Int32Regs:$x, Int32Regs:$y, 7239 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7240 7241 7242 7243def : Pat<(int_nvvm_sust_p_3d_i8_trap 7244 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7245 Int16Regs:$r), 7246 (SUST_P_3D_B8_TRAP Int64Regs:$s, 7247 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7248 Int16Regs:$r)>; 7249 7250def : Pat<(int_nvvm_sust_p_3d_i16_trap 7251 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7252 Int16Regs:$r), 7253 (SUST_P_3D_B16_TRAP Int64Regs:$s, 7254 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7255 Int16Regs:$r)>; 7256 7257def : Pat<(int_nvvm_sust_p_3d_i32_trap 7258 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7259 Int32Regs:$r), 7260 (SUST_P_3D_B32_TRAP Int64Regs:$s, 7261 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7262 Int32Regs:$r)>; 7263 7264def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 7265 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7266 Int16Regs:$r, Int16Regs:$g), 7267 (SUST_P_3D_V2B8_TRAP Int64Regs:$s, 7268 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7269 Int16Regs:$r, Int16Regs:$g)>; 7270 7271def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 7272 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7273 Int16Regs:$r, Int16Regs:$g), 7274 (SUST_P_3D_V2B16_TRAP Int64Regs:$s, 7275 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7276 Int16Regs:$r, Int16Regs:$g)>; 7277 7278def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 7279 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7280 Int32Regs:$r, Int32Regs:$g), 7281 (SUST_P_3D_V2B32_TRAP Int64Regs:$s, 7282 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7283 Int32Regs:$r, Int32Regs:$g)>; 7284 7285def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 7286 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7287 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7288 (SUST_P_3D_V4B8_TRAP Int64Regs:$s, 7289 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7290 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7291 7292def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 7293 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7294 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 7295 (SUST_P_3D_V4B16_TRAP Int64Regs:$s, 7296 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7297 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 7298 7299def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 7300 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7301 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 7302 (SUST_P_3D_V4B32_TRAP Int64Regs:$s, 7303 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 7304 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 7305 7306//----------------------------------- 7307// Read Special Registers 7308//----------------------------------- 7309 7310class PTX_READ_SREG_R64<string regname, Intrinsic intop> 7311 : NVPTXInst<(outs Int64Regs:$d), (ins), 7312 !strconcat("mov.u64 \t$d, %", regname, ";"), 7313 [(set Int64Regs:$d, (intop))]>; 7314 7315class PTX_READ_SREG_R32<string regname, Intrinsic intop> 7316 : NVPTXInst<(outs Int32Regs:$d), (ins), 7317 !strconcat("mov.u32 \t$d, %", regname, ";"), 7318 [(set Int32Regs:$d, (intop))]>; 7319 7320// TODO Add read vector-version of special registers 7321 7322def INT_PTX_SREG_TID_X : 7323 PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>; 7324def INT_PTX_SREG_TID_Y : 7325 PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>; 7326def INT_PTX_SREG_TID_Z : 7327 PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>; 7328def INT_PTX_SREG_TID_W : 7329 PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>; 7330 7331def INT_PTX_SREG_NTID_X : 7332 PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>; 7333def INT_PTX_SREG_NTID_Y : 7334 PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>; 7335def INT_PTX_SREG_NTID_Z : 7336 PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>; 7337def INT_PTX_SREG_NTID_W : 7338 PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>; 7339 7340def INT_PTX_SREG_LANEID : 7341 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 7342def INT_PTX_SREG_WARPID : 7343 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 7344def INT_PTX_SREG_NWARPID : 7345 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 7346 7347def INT_PTX_SREG_CTAID_X : 7348 PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>; 7349def INT_PTX_SREG_CTAID_Y : 7350 PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>; 7351def INT_PTX_SREG_CTAID_Z : 7352 PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>; 7353def INT_PTX_SREG_CTAID_W : 7354 PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>; 7355 7356def INT_PTX_SREG_NCTAID_X : 7357 PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>; 7358def INT_PTX_SREG_NCTAID_Y : 7359 PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>; 7360def INT_PTX_SREG_NCTAID_Z : 7361 PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>; 7362def INT_PTX_SREG_NCTAID_W : 7363 PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>; 7364 7365def INT_PTX_SREG_SMID : 7366 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 7367def INT_PTX_SREG_NSMID : 7368 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 7369def INT_PTX_SREG_GRIDID : 7370 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 7371 7372def INT_PTX_SREG_LANEMASK_EQ : 7373 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 7374def INT_PTX_SREG_LANEMASK_LE : 7375 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 7376def INT_PTX_SREG_LANEMASK_LT : 7377 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 7378def INT_PTX_SREG_LANEMASK_GE : 7379 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 7380def INT_PTX_SREG_LANEMASK_GT : 7381 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 7382 7383def INT_PTX_SREG_CLOCK : 7384 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 7385def INT_PTX_SREG_CLOCK64 : 7386 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 7387 7388def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 7389def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 7390def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 7391def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 7392 7393// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 7394// handle the constant. 7395def INT_PTX_SREG_WARPSIZE : 7396 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 7397 [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 7398 7399// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 7400// In addition to target-independent fields provided by WMMA_REGS, it adds 7401// the fields commonly used to implement specific PTX instruction -- register 7402// types and names, constraints, parts of assembly, etc. 7403class WMMA_REGINFO<WMMA_REGS r> 7404 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 7405 // NVPTX register types used to carry fragment data. 7406 NVPTXRegClass regclass = !cond( 7407 !eq(ptx_elt_type, "f16") : Float16x2Regs, 7408 !eq(ptx_elt_type, "f32") : Float32Regs, 7409 !eq(ptx_elt_type, "s32") : Int32Regs, 7410 !eq(ptx_elt_type, "s8") : Int32Regs, 7411 !eq(ptx_elt_type, "u8") : Int32Regs, 7412 !eq(ptx_elt_type, "s4") : Int32Regs, 7413 !eq(ptx_elt_type, "u4") : Int32Regs, 7414 !eq(ptx_elt_type, "b1") : Int32Regs); 7415 7416 // Instruction input/output arguments for the fragment. 7417 list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass); 7418 7419 // List of register names for the fragment -- ["ra0", "ra1",...] 7420 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 7421 7422 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 7423 string regstring = "{{$" # !head(reg_names) 7424 # !foldl("", !tail(reg_names), a, b, 7425 !strconcat(a, ", $", b)) 7426 # "}}"; 7427 7428 // Predicates for particular fragment variant. Technically those are 7429 // per-instruction predicates, but currently all fragments that can be used in 7430 // a given instruction are subject to the same constraints, so an instruction 7431 // can use predicates from any of its fragments. If/when this is no 7432 // longer the case, we can concat all per-fragment predicates to enforce that 7433 // all fragments of the instruction are viable. 7434 list<Predicate> Predicates = !cond( 7435 // fp16 -> fp16/fp32 @ m16n16k16 7436 !and(!eq(geom, "m16n16k16"), 7437 !or(!eq(ptx_elt_type, "f16"), 7438 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60], 7439 7440 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 7441 !and(!or(!eq(geom, "m8n32k16"), 7442 !eq(geom, "m32n8k16")), 7443 !or(!eq(ptx_elt_type, "f16"), 7444 !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61], 7445 7446 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 7447 !and(!or(!eq(geom,"m16n16k16"), 7448 !eq(geom,"m8n32k16"), 7449 !eq(geom,"m32n8k16")), 7450 !or(!eq(ptx_elt_type, "u8"), 7451 !eq(ptx_elt_type, "s8"), 7452 !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63], 7453 7454 // u4/s4/b1 -> s32 @ m8n8k32 (u4/s4), m8n8k128(b1) 7455 !or(!eq(geom,"m8n8k128"), 7456 !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63]); 7457 7458 // template DAGs for instruction inputs/output. 7459 dag Outs = !dag(outs, ptx_regs, reg_names); 7460 dag Ins = !dag(ins, ptx_regs, reg_names); 7461} 7462 7463// Convert dag of arguments into a dag to match given intrinsic. 7464class BuildPatternI<Intrinsic Intr, dag Ins> { 7465 // Build a dag pattern that matches the intrinsic call. 7466 dag ret = !foreach(tmp, Ins, 7467 !subst(imem, ADDRvar, 7468 !subst(MEMri64, ADDRri64, 7469 !subst(MEMri, ADDRri, 7470 !subst(ins, Intr, tmp))))); 7471} 7472 7473// Same as above, but uses PatFrag instead of an Intrinsic. 7474class BuildPatternPF<PatFrag Intr, dag Ins> { 7475 // Build a dag pattern that matches the intrinsic call. 7476 dag ret = !foreach(tmp, Ins, 7477 !subst(imem, ADDRvar, 7478 !subst(MEMri64, ADDRri64, 7479 !subst(MEMri, ADDRri, 7480 !subst(ins, Intr, tmp))))); 7481} 7482 7483// Common WMMA-related fields used for building patterns for all MMA instructions. 7484class WMMA_INSTR<string _Intr, list<dag> _Args> 7485 : NVPTXInst<(outs), (ins), "?", []> { 7486 Intrinsic Intr = !cast<Intrinsic>(_Intr); 7487 // Concatenate all arguments into a single dag. 7488 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 7489 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 7490 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 7491} 7492 7493// 7494// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7495// 7496 7497class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 7498 DAGOperand SrcOp> 7499 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 7500 [!con((ins SrcOp:$src), 7501 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7502 Requires<Frag.Predicates> { 7503 // Load/store intrinsics are overloaded on pointer's address space. 7504 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7505 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7506 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 7507 // Build PatFrag that only matches particular address space. 7508 PatFrag IntrFrag = PatFrag<PFOperands, 7509 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 7510 !cond(!eq(Space, ".shared"): AS_match.shared, 7511 !eq(Space, ".global"): AS_match.global, 7512 1: AS_match.generic)>; 7513 // Build AS-constrained pattern. 7514 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7515 7516 let OutOperandList = Frag.Outs; 7517 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7518 let AsmString = "wmma.load." 7519 # Frag.frag 7520 # ".sync" 7521 # "${ptx:aligned}" 7522 # "." # Layout 7523 # "." # Frag.geom 7524 # Space 7525 # "." # Frag.ptx_elt_type # " \t" 7526 # Frag.regstring 7527 # ", [$src]" 7528 # !if(WithStride, ", $ldm", "") 7529 # ";"; 7530} 7531 7532// 7533// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7534// 7535class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 7536 bit WithStride, DAGOperand DstOp> 7537 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 7538 [!con((ins DstOp:$dst), 7539 Frag.Ins, 7540 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7541 Requires<Frag.Predicates> { 7542 7543 // Load/store intrinsics are overloaded on pointer's address space. 7544 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7545 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7546 dag PFOperands = !con((ops node:$dst), 7547 !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names), 7548 !if(WithStride, (ops node:$ldm), (ops))); 7549 // Build PatFrag that only matches particular address space. 7550 PatFrag IntrFrag = PatFrag<PFOperands, 7551 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 7552 !cond(!eq(Space, ".shared"): AS_match.shared, 7553 !eq(Space, ".global"): AS_match.global, 7554 1: AS_match.generic)>; 7555 // Build AS-constrained pattern. 7556 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7557 7558 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7559 let OutOperandList = (outs); 7560 let AsmString = "wmma.store.d.sync" 7561 # "${ptx:aligned}" 7562 # "." # Layout 7563 # "." # Frag.geom 7564 # Space 7565 # "." # Frag.ptx_elt_type 7566 # " \t[$dst]," 7567 # Frag.regstring 7568 # !if(WithStride, ", $ldm", "") 7569 # ";"; 7570} 7571 7572// Create all load/store variants 7573defset list<WMMA_INSTR> MMA_LDSTs = { 7574 foreach layout = ["row", "col"] in { 7575 foreach stride = [0, 1] in { 7576 foreach space = [".global", ".shared", ""] in { 7577 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 7578 foreach frag = NVVM_MMA_OPS.all_ld_ops in 7579 foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in 7580 def : WMMA_LOAD<WMMA_REGINFO<frag>, layout, space, stride, addr>; 7581 foreach frag = NVVM_MMA_OPS.all_st_ops in 7582 foreach _ = NVVM_MMA_SUPPORTED<[frag], layout>.ret in 7583 def : WMMA_STORE_D<WMMA_REGINFO<frag>, layout, space, stride, addr>; 7584 } // addr 7585 } // space 7586 } // stride 7587 } // layout 7588} // defset 7589 7590// WMMA.MMA 7591class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 7592 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 7593 string ALayout, string BLayout, int Satfinite> 7594 : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, Satfinite, FragA, FragB, FragC, FragD>.record, 7595 [FragA.Ins, FragB.Ins, FragC.Ins]>, 7596 // Requires does not seem to have effect on Instruction w/o Patterns. 7597 // We set it here anyways and propagate to the Pat<> we construct below. 7598 Requires<FragA.Predicates> { 7599 let OutOperandList = FragD.Outs; 7600 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7601 string TypeList = !cond( 7602 !eq(FragD.ptx_elt_type, "s32") : ".s32" 7603 # "." # FragA.ptx_elt_type 7604 # "." # FragB.ptx_elt_type 7605 # ".s32", 7606 1: "." # FragD.ptx_elt_type # "." # FragC.ptx_elt_type, 7607 ); 7608 let AsmString = "wmma.mma" 7609 # !if(!eq(FragA.ptx_elt_type, "b1"), ".xor.popc", "") 7610 # ".sync" 7611 # "${ptx:aligned}" 7612 # "." # ALayout 7613 # "." # BLayout 7614 # "." # FragA.geom 7615 # TypeList 7616 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 7617 # FragD.regstring # ",\n\t\t" 7618 # FragA.regstring # ",\n\t\t" 7619 # FragB.regstring # ",\n\t\t" 7620 # FragC.regstring # ";"; 7621} 7622 7623defset list<WMMA_INSTR> MMAs = { 7624 foreach layout_a = ["row", "col"] in { 7625 foreach layout_b = ["row", "col"] in { 7626 foreach satf = [0, 1] in { 7627 foreach op = NVVM_MMA_OPS.all_mma_ops in { 7628 foreach _ = NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret in { 7629 def : WMMA_MMA<WMMA_REGINFO<op[0]>, 7630 WMMA_REGINFO<op[1]>, 7631 WMMA_REGINFO<op[2]>, 7632 WMMA_REGINFO<op[3]>, 7633 layout_a, layout_b, satf>; 7634 } 7635 } // op 7636 } // satf 7637 } // layout_b 7638 } // layout_a 7639} // defset 7640 7641 7642// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 7643// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 7644// the instruction record. 7645class WMMA_PAT<WMMA_INSTR wi> 7646 : Pat<wi.IntrinsicPattern, 7647 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 7648 (wi ptx.version))>, 7649 Requires<wi.Predicates>; 7650 7651// Build intrinsic->instruction patterns for all MMA instructions. 7652foreach mma = !listconcat(MMAs, MMA_LDSTs) in 7653 def : WMMA_PAT<mma>; 7654