1//===-- SIInstrInfo.td -----------------------------------------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def isWave32 : Predicate<"Subtarget->getWavefrontSize() == 32">, 10 AssemblerPredicate <(all_of FeatureWavefrontSize32)>; 11def isWave64 : Predicate<"Subtarget->getWavefrontSize() == 64">, 12 AssemblerPredicate <(all_of FeatureWavefrontSize64)>; 13 14class AMDGPUMnemonicAlias<string From, string To, string VariantName = ""> 15 : MnemonicAlias<From, To, VariantName>, PredicateControl; 16 17// Except for the NONE field, this must be kept in sync with the 18// SIEncodingFamily enum in SIInstrInfo.cpp and the columns of the 19// getMCOpcodeGen table. 20def SIEncodingFamily { 21 int NONE = -1; 22 int SI = 0; 23 int VI = 1; 24 int SDWA = 2; 25 int SDWA9 = 3; 26 int GFX80 = 4; 27 int GFX9 = 5; 28 int GFX10 = 6; 29 int SDWA10 = 7; 30 int GFX90A = 8; 31 int GFX940 = 9; 32 int GFX11 = 10; 33 int GFX12 = 11; 34} 35 36//===----------------------------------------------------------------------===// 37// Subtarget info 38//===----------------------------------------------------------------------===// 39 40class GFXGen<Predicate pred, string dn, string suffix, int sub> { 41 Predicate AssemblerPredicate = pred; 42 string DecoderNamespace = dn; 43 string Suffix = suffix; 44 int Subtarget = sub; 45} 46 47def GFX12Gen : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>; 48def GFX11Gen : GFXGen<isGFX11Only, "GFX11", "_gfx11", SIEncodingFamily.GFX11>; 49def GFX10Gen : GFXGen<isGFX10Only, "GFX10", "_gfx10", SIEncodingFamily.GFX10>; 50 51//===----------------------------------------------------------------------===// 52// SI DAG Nodes 53//===----------------------------------------------------------------------===// 54 55def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>; 56 57def SDTSBufferLoad : SDTypeProfile<1, 3, 58 [ // vdata 59 SDTCisVT<1, v4i32>, // rsrc 60 SDTCisVT<2, i32>, // offset(imm) 61 SDTCisVT<3, i32>]>; // cachepolicy 62 63def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD", SDTSBufferLoad, 64 [SDNPMayLoad, SDNPMemOperand]>; 65 66def SIsbuffer_load_byte : SDNode<"AMDGPUISD::SBUFFER_LOAD_BYTE", SDTSBufferLoad, 67 [SDNPMayLoad, SDNPMemOperand]>; 68 69def SIsbuffer_load_ubyte 70 : SDNode<"AMDGPUISD::SBUFFER_LOAD_UBYTE", SDTSBufferLoad, 71 [SDNPMayLoad, SDNPMemOperand]>; 72 73def SIsbuffer_load_short 74 : SDNode<"AMDGPUISD::SBUFFER_LOAD_SHORT", SDTSBufferLoad, 75 [SDNPMayLoad, SDNPMemOperand]>; 76 77def SIsbuffer_load_ushort 78 : SDNode<"AMDGPUISD::SBUFFER_LOAD_USHORT", SDTSBufferLoad, 79 [SDNPMayLoad, SDNPMemOperand]>; 80 81def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT", 82 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>, 83 [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue] 84>; 85 86def SDTAtomic2_f32 : SDTypeProfile<1, 2, [ 87 SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1> 88]>; 89 90// load_d16_{lo|hi} ptr, tied_input 91def SIload_d16 : SDTypeProfile<1, 2, [ 92 SDTCisPtrTy<1>, 93 SDTCisSameAs<0, 2> 94]>; 95 96 97def SDTtbuffer_load : SDTypeProfile<1, 8, 98 [ // vdata 99 SDTCisVT<1, v4i32>, // rsrc 100 SDTCisVT<2, i32>, // vindex(VGPR) 101 SDTCisVT<3, i32>, // voffset(VGPR) 102 SDTCisVT<4, i32>, // soffset(SGPR) 103 SDTCisVT<5, i32>, // offset(imm) 104 SDTCisVT<6, i32>, // format(imm) 105 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 106 SDTCisVT<8, i1> // idxen(imm) 107 ]>; 108 109def SItbuffer_load : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT", SDTtbuffer_load, 110 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 111def SItbuffer_load_d16 : SDNode<"AMDGPUISD::TBUFFER_LOAD_FORMAT_D16", 112 SDTtbuffer_load, 113 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain]>; 114 115def SDTtbuffer_store : SDTypeProfile<0, 9, 116 [ // vdata 117 SDTCisVT<1, v4i32>, // rsrc 118 SDTCisVT<2, i32>, // vindex(VGPR) 119 SDTCisVT<3, i32>, // voffset(VGPR) 120 SDTCisVT<4, i32>, // soffset(SGPR) 121 SDTCisVT<5, i32>, // offset(imm) 122 SDTCisVT<6, i32>, // format(imm) 123 SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm) 124 SDTCisVT<8, i1> // idxen(imm) 125 ]>; 126 127def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTtbuffer_store, 128 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 129def SItbuffer_store_d16 : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT_D16", 130 SDTtbuffer_store, 131 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 132 133def SDTBufferLoad : SDTypeProfile<1, 7, 134 [ // vdata 135 SDTCisVT<1, v4i32>, // rsrc 136 SDTCisVT<2, i32>, // vindex(VGPR) 137 SDTCisVT<3, i32>, // voffset(VGPR) 138 SDTCisVT<4, i32>, // soffset(SGPR) 139 SDTCisVT<5, i32>, // offset(imm) 140 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 141 SDTCisVT<7, i1>]>; // idxen(imm) 142 143def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad, 144 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 145def SIbuffer_load_ubyte : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE", SDTBufferLoad, 146 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 147def SIbuffer_load_ushort : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT", SDTBufferLoad, 148 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 149def SIbuffer_load_byte : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE", SDTBufferLoad, 150 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 151def SIbuffer_load_short: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT", SDTBufferLoad, 152 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 153def SIbuffer_load_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_TFE", SDTBufferLoad, 154 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 155def SIbuffer_load_ubyte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_UBYTE_TFE", SDTBufferLoad, 156 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 157def SIbuffer_load_ushort_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_USHORT_TFE", SDTBufferLoad, 158 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 159def SIbuffer_load_byte_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_BYTE_TFE", SDTBufferLoad, 160 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 161def SIbuffer_load_short_tfe: SDNode <"AMDGPUISD::BUFFER_LOAD_SHORT_TFE", SDTBufferLoad, 162 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 163def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad, 164 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 165def SIbuffer_load_format_tfe : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_TFE", SDTBufferLoad, 166 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 167def SIbuffer_load_format_d16 : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT_D16", 168 SDTBufferLoad, 169 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>; 170 171def SDTBufferStore : SDTypeProfile<0, 8, 172 [ // vdata 173 SDTCisVT<1, v4i32>, // rsrc 174 SDTCisVT<2, i32>, // vindex(VGPR) 175 SDTCisVT<3, i32>, // voffset(VGPR) 176 SDTCisVT<4, i32>, // soffset(SGPR) 177 SDTCisVT<5, i32>, // offset(imm) 178 SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm) 179 SDTCisVT<7, i1>]>; // idxen(imm) 180 181def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore, 182 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 183def SIbuffer_store_byte: SDNode <"AMDGPUISD::BUFFER_STORE_BYTE", 184 SDTBufferStore, 185 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 186def SIbuffer_store_short : SDNode <"AMDGPUISD::BUFFER_STORE_SHORT", 187 SDTBufferStore, 188 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 189def SIbuffer_store_format : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT", 190 SDTBufferStore, 191 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 192def SIbuffer_store_format_d16 : SDNode <"AMDGPUISD::BUFFER_STORE_FORMAT_D16", 193 SDTBufferStore, 194 [SDNPMayStore, SDNPMemOperand, SDNPHasChain]>; 195 196multiclass SDBufferAtomic<string opcode> { 197 def "" : SDNode <opcode, 198 SDTypeProfile<1, 8, 199 [SDTCisVT<2, v4i32>, // rsrc 200 SDTCisVT<3, i32>, // vindex(VGPR) 201 SDTCisVT<4, i32>, // voffset(VGPR) 202 SDTCisVT<5, i32>, // soffset(SGPR) 203 SDTCisVT<6, i32>, // offset(imm) 204 SDTCisVT<7, i32>, // cachepolicy(imm) 205 SDTCisVT<8, i1>]>, // idxen(imm) 206 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 207 >; 208 def "_noret" : PatFrag< 209 (ops node:$vdata_in, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, 210 node:$offset, node:$cachepolicy, node:$idxen), 211 (!cast<SDNode>(NAME) node:$vdata_in, node:$rsrc, node:$vindex, 212 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 213 node:$idxen)> { 214 let HasNoUse = true; 215 } 216} 217 218defm SIbuffer_atomic_swap : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SWAP">; 219defm SIbuffer_atomic_add : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_ADD">; 220defm SIbuffer_atomic_sub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SUB">; 221defm SIbuffer_atomic_smin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMIN">; 222defm SIbuffer_atomic_umin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMIN">; 223defm SIbuffer_atomic_smax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_SMAX">; 224defm SIbuffer_atomic_umax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_UMAX">; 225defm SIbuffer_atomic_and : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_AND">; 226defm SIbuffer_atomic_or : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_OR">; 227defm SIbuffer_atomic_xor : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_XOR">; 228defm SIbuffer_atomic_inc : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_INC">; 229defm SIbuffer_atomic_dec : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_DEC">; 230defm SIbuffer_atomic_csub : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_CSUB">; 231defm SIbuffer_atomic_fadd : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FADD">; 232defm SIbuffer_atomic_fmin : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMIN">; 233defm SIbuffer_atomic_fmax : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_FMAX">; 234defm SIbuffer_atomic_cond_sub_u32 : SDBufferAtomic <"AMDGPUISD::BUFFER_ATOMIC_COND_SUB_U32">; 235 236def SIbuffer_atomic_cmpswap : SDNode <"AMDGPUISD::BUFFER_ATOMIC_CMPSWAP", 237 SDTypeProfile<1, 9, 238 [SDTCisVT<3, v4i32>, // rsrc 239 SDTCisVT<4, i32>, // vindex(VGPR) 240 SDTCisVT<5, i32>, // voffset(VGPR) 241 SDTCisVT<6, i32>, // soffset(SGPR) 242 SDTCisVT<7, i32>, // offset(imm) 243 SDTCisVT<8, i32>, // cachepolicy(imm) 244 SDTCisVT<9, i1>]>, // idxen(imm) 245 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 246>; 247 248def SIbuffer_atomic_cmpswap_noret : PatFrag< 249 (ops node:$src, node:$cmp, node:$rsrc, node:$vindex, node:$voffset, 250 node:$soffset, node:$offset, node:$cachepolicy, node:$idxen), 251 (SIbuffer_atomic_cmpswap node:$src, node:$cmp, node:$rsrc, node:$vindex, 252 node:$voffset, node:$soffset, node:$offset, node:$cachepolicy, 253 node:$idxen)> { 254 let HasNoUse = true; 255} 256 257class SDGlobalAtomicNoRtn<string opcode, ValueType ty> : SDNode <opcode, 258 SDTypeProfile<0, 2, 259 [SDTCisPtrTy<0>, // vaddr 260 SDTCisVT<1, ty>]>, // vdata 261 [SDNPMemOperand, SDNPHasChain, SDNPMayLoad, SDNPMayStore] 262>; 263 264def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", 265 SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]> 266>; 267 268def SIlds : SDNode<"AMDGPUISD::LDS", 269 SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> 270>; 271 272def SIload_d16_lo : SDNode<"AMDGPUISD::LOAD_D16_LO", 273 SIload_d16, 274 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 275>; 276 277def SIload_d16_lo_u8 : SDNode<"AMDGPUISD::LOAD_D16_LO_U8", 278 SIload_d16, 279 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 280>; 281 282def SIload_d16_lo_i8 : SDNode<"AMDGPUISD::LOAD_D16_LO_I8", 283 SIload_d16, 284 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 285>; 286 287def SIload_d16_hi : SDNode<"AMDGPUISD::LOAD_D16_HI", 288 SIload_d16, 289 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 290>; 291 292def SIload_d16_hi_u8 : SDNode<"AMDGPUISD::LOAD_D16_HI_U8", 293 SIload_d16, 294 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 295>; 296 297def SIload_d16_hi_i8 : SDNode<"AMDGPUISD::LOAD_D16_HI_I8", 298 SIload_d16, 299 [SDNPMayLoad, SDNPMemOperand, SDNPHasChain] 300>; 301 302def SIdenorm_mode : SDNode<"AMDGPUISD::DENORM_MODE", 303 SDTypeProfile<0 ,1, [SDTCisInt<0>]>, 304 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 305>; 306 307def SIfptrunc_round_upward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_UPWARD", 308 SDTFPRoundOp 309>; 310 311def SIfptrunc_round_downward : SDNode<"AMDGPUISD::FPTRUNC_ROUND_DOWNWARD", 312 SDTFPRoundOp 313>; 314 315//===----------------------------------------------------------------------===// 316// ValueType helpers 317//===----------------------------------------------------------------------===// 318 319class isIntType<ValueType SrcVT> { 320 bit ret = !and(SrcVT.isInteger, !ne(SrcVT.Value, i1.Value)); 321} 322 323//===----------------------------------------------------------------------===// 324// SDNodes PatFrags for loads/stores with a glue input. 325// This is for SDNodes and PatFrag for local loads and stores to 326// enable s_mov_b32 m0, -1 to be glued to the memory instructions. 327// 328// These mirror the regular load/store PatFrags and rely on special 329// processing during Select() to add the glued copy. 330// 331//===----------------------------------------------------------------------===// 332 333def AMDGPUld_glue : SDNode <"ISD::LOAD", SDTLoad, 334 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 335>; 336 337def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, 338 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 339>; 340 341def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { 342 let IsLoad = 1; 343 let IsUnindexed = 1; 344} 345 346def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { 347 let IsLoad = 1; 348 let IsNonExtLoad = 1; 349} 350 351def atomic_load_8_glue : PatFrag<(ops node:$ptr), 352 (AMDGPUatomic_ld_glue node:$ptr)> { 353 let IsAtomic = 1; 354 let MemoryVT = i8; 355} 356 357def atomic_load_16_glue : PatFrag<(ops node:$ptr), 358 (AMDGPUatomic_ld_glue node:$ptr)> { 359 let IsAtomic = 1; 360 let MemoryVT = i16; 361} 362 363def atomic_load_32_glue : PatFrag<(ops node:$ptr), 364 (AMDGPUatomic_ld_glue node:$ptr)> { 365 let IsAtomic = 1; 366 let MemoryVT = i32; 367} 368 369def atomic_load_64_glue : PatFrag<(ops node:$ptr), 370 (AMDGPUatomic_ld_glue node:$ptr)> { 371 let IsAtomic = 1; 372 let MemoryVT = i64; 373} 374 375def extload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 376 let IsLoad = 1; 377 let IsAnyExtLoad = 1; 378} 379 380def sextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 381 let IsLoad = 1; 382 let IsSignExtLoad = 1; 383} 384 385def zextload_glue : PatFrag<(ops node:$ptr), (unindexedload_glue node:$ptr)> { 386 let IsLoad = 1; 387 let IsZeroExtLoad = 1; 388} 389 390def extloadi8_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 391 let IsLoad = 1; 392 let MemoryVT = i8; 393} 394 395def zextloadi8_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 396 let IsLoad = 1; 397 let MemoryVT = i8; 398} 399 400def extloadi16_glue : PatFrag<(ops node:$ptr), (extload_glue node:$ptr)> { 401 let IsLoad = 1; 402 let MemoryVT = i16; 403} 404 405def zextloadi16_glue : PatFrag<(ops node:$ptr), (zextload_glue node:$ptr)> { 406 let IsLoad = 1; 407 let MemoryVT = i16; 408} 409 410def sextloadi8_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 411 let IsLoad = 1; 412 let MemoryVT = i8; 413} 414 415def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { 416 let IsLoad = 1; 417 let MemoryVT = i16; 418} 419 420 421let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 422def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { 423 let IsNonExtLoad = 1; 424} 425 426def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; 427def sextloadi8_local_m0 : PatFrag<(ops node:$ptr), (sextloadi8_glue node:$ptr)>; 428def zextloadi8_local_m0 : PatFrag<(ops node:$ptr), (zextloadi8_glue node:$ptr)>; 429 430def extloadi16_local_m0 : PatFrag<(ops node:$ptr), (extloadi16_glue node:$ptr)>; 431def sextloadi16_local_m0 : PatFrag<(ops node:$ptr), (sextloadi16_glue node:$ptr)>; 432def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr)>; 433} // End IsLoad = 1, , AddressSpaces = LoadAddress_local.AddrSpaces 434 435def load_align8_local_m0 : PatFrag<(ops node:$ptr), 436 (load_local_m0 node:$ptr)> { 437 let IsLoad = 1; 438 int MinAlignment = 8; 439} 440 441def load_align16_local_m0 : PatFrag<(ops node:$ptr), 442 (load_local_m0 node:$ptr)> { 443 let IsLoad = 1; 444 int MinAlignment = 16; 445} 446 447let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { 448def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr), 449 (atomic_load_8_glue node:$ptr)>; 450def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr), 451 (atomic_load_16_glue node:$ptr)>; 452def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr), 453 (atomic_load_32_glue node:$ptr)>; 454def atomic_load_64_local_m0 : PatFrag<(ops node:$ptr), 455 (atomic_load_64_glue node:$ptr)>; 456} // End let AddressSpaces = LoadAddress_local.AddrSpaces 457 458 459def AMDGPUst_glue : SDNode <"ISD::STORE", SDTStore, 460 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 461>; 462 463def AMDGPUatomic_st_glue : SDNode <"ISD::ATOMIC_STORE", SDTAtomicStore, 464 [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPInGlue] 465>; 466 467def unindexedstore_glue : PatFrag<(ops node:$val, node:$ptr), 468 (AMDGPUst_glue node:$val, node:$ptr)> { 469 let IsStore = 1; 470 let IsUnindexed = 1; 471} 472 473def store_glue : PatFrag<(ops node:$val, node:$ptr), 474 (unindexedstore_glue node:$val, node:$ptr)> { 475 let IsStore = 1; 476 let IsTruncStore = 0; 477} 478 479def truncstore_glue : PatFrag<(ops node:$val, node:$ptr), 480 (unindexedstore_glue node:$val, node:$ptr)> { 481 let IsStore = 1; 482 let IsTruncStore = 1; 483} 484 485def truncstorei8_glue : PatFrag<(ops node:$val, node:$ptr), 486 (truncstore_glue node:$val, node:$ptr)> { 487 let IsStore = 1; 488 let MemoryVT = i8; 489 let IsTruncStore = 1; 490} 491 492def truncstorei16_glue : PatFrag<(ops node:$val, node:$ptr), 493 (truncstore_glue node:$val, node:$ptr)> { 494 let IsStore = 1; 495 let MemoryVT = i16; 496 let IsTruncStore = 1; 497} 498 499let IsStore = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 500def store_local_m0 : PatFrag<(ops node:$val, node:$ptr), 501 (store_glue node:$val, node:$ptr)>; 502def truncstorei8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 503 (truncstorei8_glue node:$val, node:$ptr)>; 504def truncstorei16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 505 (truncstorei16_glue node:$val, node:$ptr)>; 506} 507 508def store_align8_local_m0 : PatFrag <(ops node:$value, node:$ptr), 509 (store_local_m0 node:$value, node:$ptr)>, 510 Aligned<8> { 511 let IsStore = 1; 512} 513 514def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr), 515 (store_local_m0 node:$value, node:$ptr)>, 516 Aligned<16> { 517 let IsStore = 1; 518} 519 520let PredicateCode = [{return cast<MemSDNode>(N)->getAlign() < 4;}], 521 GISelPredicateCode = [{return (*MI.memoperands_begin())->getAlign() < 4;}], 522 AddressSpaces = [ AddrSpaces.Local ] in { 523def load_align_less_than_4_local : PatFrag<(ops node:$ptr), 524 (load_local node:$ptr)> { 525 let IsLoad = 1; 526 let IsNonExtLoad = 1; 527} 528 529def load_align_less_than_4_local_m0 : PatFrag<(ops node:$ptr), 530 (load_local_m0 node:$ptr)> { 531 let IsLoad = 1; 532 let IsNonExtLoad = 1; 533} 534 535def store_align_less_than_4_local : PatFrag <(ops node:$value, node:$ptr), 536 (store_local node:$value, node:$ptr)> { 537 let IsStore = 1; 538 let IsTruncStore = 0; 539} 540 541def store_align_less_than_4_local_m0 : PatFrag <(ops node:$value, node:$ptr), 542 (store_local_m0 node:$value, node:$ptr)> { 543 let IsStore = 1; 544 let IsTruncStore = 0; 545} 546} 547 548def atomic_store_8_glue : PatFrag < 549 (ops node:$ptr, node:$value), 550 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 551 let IsAtomic = 1; 552 let MemoryVT = i8; 553} 554 555def atomic_store_16_glue : PatFrag < 556 (ops node:$ptr, node:$value), 557 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 558 let IsAtomic = 1; 559 let MemoryVT = i16; 560} 561 562def atomic_store_32_glue : PatFrag < 563 (ops node:$ptr, node:$value), 564 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 565 let IsAtomic = 1; 566 let MemoryVT = i32; 567} 568 569def atomic_store_64_glue : PatFrag < 570 (ops node:$ptr, node:$value), 571 (AMDGPUatomic_st_glue node:$ptr, node:$value)> { 572 let IsAtomic = 1; 573 let MemoryVT = i64; 574} 575 576let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces in { 577def atomic_store_8_local_m0 : PatFrag<(ops node:$val, node:$ptr), 578 (atomic_store_8_glue node:$val, node:$ptr)>; 579def atomic_store_16_local_m0 : PatFrag<(ops node:$val, node:$ptr), 580 (atomic_store_16_glue node:$val, node:$ptr)>; 581def atomic_store_32_local_m0 : PatFrag<(ops node:$val, node:$ptr), 582 (atomic_store_32_glue node:$val, node:$ptr)>; 583def atomic_store_64_local_m0 : PatFrag<(ops node:$val, node:$ptr), 584 (atomic_store_64_glue node:$val, node:$ptr)>; 585} // End let IsAtomic = 1, AddressSpaces = StoreAddress_local.AddrSpaces 586 587 588//===----------------------------------------------------------------------===// 589// SDNodes PatFrags for a16 loads and stores with 3 components. 590// v3f16/v3i16 is widened to v4f16/v4i16, so we need to match on the memory 591// load/store size. 592//===----------------------------------------------------------------------===// 593 594class mubuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 595 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 596 node:$auxiliary, node:$idxen), 597 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 598 node:$auxiliary, node:$idxen)> { 599 let IsLoad = 1; 600 let MemoryVT = vt; 601} 602 603class mubuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 604 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 605 node:$auxiliary, node:$idxen), 606 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 607 node:$auxiliary, node:$idxen)> { 608 let IsStore = 1; 609 let MemoryVT = vt; 610} 611 612class mtbuf_intrinsic_load<SDPatternOperator name, ValueType vt> : PatFrag < 613 (ops node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 614 node:$format, node:$auxiliary, node:$idxen), 615 (name node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 616 node:$format, node:$auxiliary, node:$idxen)> { 617 let IsLoad = 1; 618 let MemoryVT = vt; 619} 620 621class mtbuf_intrinsic_store<SDPatternOperator name, ValueType vt> : PatFrag < 622 (ops node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 623 node:$format, node:$auxiliary, node:$idxen), 624 (name node:$vdata, node:$rsrc, node:$vindex, node:$voffset, node:$soffset, node:$offset, 625 node:$format, node:$auxiliary, node:$idxen)> { 626 let IsStore = 1; 627 let MemoryVT = vt; 628} 629 630//===----------------------------------------------------------------------===// 631// SDNodes PatFrags for d16 loads 632//===----------------------------------------------------------------------===// 633 634class LoadD16Frag <SDPatternOperator op> : PatFrag< 635 (ops node:$ptr, node:$tied_in), 636 (op node:$ptr, node:$tied_in)> { 637 let IsLoad = 1; 638} 639 640foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 641let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 642 643def load_d16_hi_#as : LoadD16Frag <SIload_d16_hi>; 644 645def az_extloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_u8> { 646 let MemoryVT = i8; 647} 648 649def sextloadi8_d16_hi_#as : LoadD16Frag <SIload_d16_hi_i8> { 650 let MemoryVT = i8; 651} 652 653def load_d16_lo_#as : LoadD16Frag <SIload_d16_lo>; 654 655def az_extloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_u8> { 656 let MemoryVT = i8; 657} 658 659def sextloadi8_d16_lo_#as : LoadD16Frag <SIload_d16_lo_i8> { 660 let MemoryVT = i8; 661} 662 663} // End let AddressSpaces = ... 664} // End foreach AddrSpace 665 666def lshr_rev : PatFrag < 667 (ops node:$src1, node:$src0), 668 (srl $src0, $src1) 669>; 670 671def ashr_rev : PatFrag < 672 (ops node:$src1, node:$src0), 673 (sra $src0, $src1) 674>; 675 676def lshl_rev : PatFrag < 677 (ops node:$src1, node:$src0), 678 (shl $src0, $src1) 679>; 680 681def add_ctpop : PatFrag < 682 (ops node:$src0, node:$src1), 683 (add (ctpop $src0), $src1) 684>; 685 686def xnor : PatFrag < 687 (ops node:$src0, node:$src1), 688 (not (xor $src0, $src1)) 689>; 690 691foreach I = 1-4 in { 692def shl#I#_add : PatFrag < 693 (ops node:$src0, node:$src1), 694 (add (shl_oneuse $src0, (i32 I)), $src1)> { 695 // FIXME: Poor substitute for disabling pattern in SelectionDAG 696 let PredicateCode = [{return false;}]; 697 let GISelPredicateCode = [{return true;}]; 698} 699} 700 701multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0, 702 SDTypeProfile tc = SDTAtomic2, 703 bit IsInt = 1> { 704 705 def _glue : SDNode < 706 !if(is_amdgpu, "AMDGPUISD", "ISD")#"::ATOMIC_"#op_name, tc, 707 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 708 >; 709 710 let AddressSpaces = StoreAddress_local.AddrSpaces in { 711 712 if IsInt then { 713 defm _local_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; 714 defm _local_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; 715 } else { 716 defm _local_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>; 717 defm _local_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>; 718 } 719 } 720 721 let AddressSpaces = StoreAddress_region.AddrSpaces in { 722 if IsInt then { 723 defm _region_m0 : binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; 724 defm _region_m0 : noret_binary_atomic_op <!cast<SDNode>(NAME#"_glue")>; 725 } else { 726 defm _region_m0 : binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>; 727 defm _region_m0 : noret_binary_atomic_op_fp <!cast<SDNode>(NAME#"_glue")>; 728 } 729 } 730} 731 732defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; 733defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; 734defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">; 735defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">; 736defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; 737defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; 738defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; 739defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; 740defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; 741defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; 742defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; 743defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; 744defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; 745defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 0, SDTAtomic2_f32, 0>; 746defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 0, SDTAtomic2_f32, 0>; 747 748def as_i1timm : SDNodeXForm<timm, [{ 749 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); 750}]>; 751 752def as_i8imm : SDNodeXForm<imm, [{ 753 return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8); 754}]>; 755 756def as_i8timm : SDNodeXForm<timm, [{ 757 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 758}]>; 759 760def as_i16imm : SDNodeXForm<imm, [{ 761 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 762}]>; 763 764def as_i16timm : SDNodeXForm<timm, [{ 765 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16); 766}]>; 767 768def as_i32imm: SDNodeXForm<imm, [{ 769 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 770}]>; 771 772def as_i32timm: SDNodeXForm<timm, [{ 773 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 774}]>; 775 776def as_i64imm: SDNodeXForm<imm, [{ 777 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); 778}]>; 779 780def cond_as_i32imm: SDNodeXForm<cond, [{ 781 return CurDAG->getTargetConstant(N->get(), SDLoc(N), MVT::i32); 782}]>; 783 784// Copied from the AArch64 backend: 785def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{ 786return CurDAG->getTargetConstant( 787 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32); 788}]>; 789 790def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{ 791 auto FI = cast<FrameIndexSDNode>(N); 792 return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32); 793}]>; 794 795// Copied from the AArch64 backend: 796def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{ 797return CurDAG->getTargetConstant( 798 N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64); 799}]>; 800 801class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{ 802 uint64_t Imm = N->getZExtValue(); 803 unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; 804 return CurDAG->getTargetConstant(Bit, SDLoc(N), MVT::i1); 805}]>; 806 807def SIMM16bit : TImmLeaf <i32, 808 [{return isInt<16>(Imm) || isUInt<16>(Imm);}], 809 as_i16timm 810>; 811 812def i64imm_32bit : ImmLeaf<i64, [{ 813 return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm); 814}]>; 815 816def InlineImm64 : IntImmLeaf<i64, [{ 817 return isInlineImmediate(Imm); 818}]>; 819 820def InlineImmFP32 : FPImmLeaf<f32, [{ 821 return isInlineImmediate(Imm); 822}]>; 823 824def InlineImmFP64 : FPImmLeaf<f64, [{ 825 return isInlineImmediate(Imm); 826}]>; 827 828 829class VGPRImm <dag frag> : PatLeaf<frag, [{ 830 return isVGPRImm(N); 831}]>; 832 833def NegateImm : SDNodeXForm<imm, [{ 834 return CurDAG->getConstant(-N->getSExtValue(), SDLoc(N), MVT::i32); 835}]>; 836 837// TODO: When FP inline imm values work? 838def NegSubInlineConst32 : ImmLeaf<i32, [{ 839 return Imm < -16 && Imm >= -64; 840}], NegateImm>; 841 842def NegSubInlineIntConst16 : ImmLeaf<i16, [{ 843 return Imm < -16 && Imm >= -64; 844}], NegateImm>; 845 846def ShiftAmt32Imm : ImmLeaf <i32, [{ 847 return Imm < 32; 848}]>; 849 850def fp16_zeros_high_16bits : PatLeaf<(f16 VGPR_32:$src), [{ 851 return fp16SrcZerosHighBits(N->getOpcode()); 852}]>; 853 854def is_canonicalized : PatLeaf<(fAny srcvalue:$src), [{ 855 const SITargetLowering &Lowering = 856 *static_cast<const SITargetLowering *>(getTargetLowering()); 857 return Lowering.isCanonicalized(*CurDAG, SDValue(N, 0)); 858}]> { 859 let GISelPredicateCode = [{ 860 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 861 MF.getSubtarget().getTargetLowering()); 862 const MachineOperand &Dst = MI.getOperand(0); 863 assert(Dst.isDef()); 864 return TLI->isCanonicalized(Dst.getReg(), MF); 865 }]; 866} 867 868//===----------------------------------------------------------------------===// 869// MUBUF/SMEM Patterns 870//===----------------------------------------------------------------------===// 871 872def extract_cpol : SDNodeXForm<timm, [{ 873 return CurDAG->getTargetConstant( 874 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 875 ? AMDGPU::CPol::ALL 876 : AMDGPU::CPol::ALL_pregfx12), 877 SDLoc(N), MVT::i8); 878}]>; 879 880def extract_swz : SDNodeXForm<timm, [{ 881 const bool Swizzle = 882 N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 883 ? AMDGPU::CPol::SWZ 884 : AMDGPU::CPol::SWZ_pregfx12); 885 return CurDAG->getTargetConstant(Swizzle, SDLoc(N), MVT::i8); 886}]>; 887 888def extract_cpol_set_glc : SDNodeXForm<timm, [{ 889 const uint32_t cpol = N->getZExtValue() & (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12 890 ? AMDGPU::CPol::ALL 891 : AMDGPU::CPol::ALL_pregfx12); 892 return CurDAG->getTargetConstant(cpol | AMDGPU::CPol::GLC, SDLoc(N), MVT::i8); 893}]>; 894 895//===----------------------------------------------------------------------===// 896// Custom Operands 897//===----------------------------------------------------------------------===// 898 899def SOPPBrTarget : CustomOperand<OtherVT> { 900 let PrintMethod = "printOperand"; 901 let EncoderMethod = "getSOPPBrEncoding"; 902 let DecoderMethod = "decodeSOPPBrTarget"; 903 let OperandType = "OPERAND_PCREL"; 904} 905 906def si_ga : Operand<iPTR>; 907 908def InterpSlot : CustomOperand<i32>; 909 910// It appears to be necessary to create a separate operand for this to 911// be able to parse attr<num> with no space. 912def InterpAttr : CustomOperand<i32>; 913 914def InterpAttrChan : ImmOperand<i32>; 915 916def SplitBarrier : ImmOperand<i32> { 917 let OperandNamespace = "AMDGPU"; 918 let OperandType = "OPERAND_INLINE_SPLIT_BARRIER_INT32"; 919 let DecoderMethod = "decodeSplitBarrier"; 920 let PrintMethod = "printOperand"; 921} 922 923def VReg32OrOffClass : AsmOperandClass { 924 let Name = "VReg32OrOff"; 925 let ParserMethod = "parseVReg32OrOff"; 926} 927 928def SendMsg : CustomOperand<i32>; 929 930def Swizzle : CustomOperand<i16, 1>; 931 932def Endpgm : CustomOperand<i16, 1>; 933 934def SWaitCnt : CustomOperand<i32>; 935 936def DepCtr : CustomOperand<i32>; 937 938def SDelayALU : CustomOperand<i32>; 939 940include "SIInstrFormats.td" 941include "VIInstrFormats.td" 942 943def BoolReg : AsmOperandClass { 944 let Name = "BoolReg"; 945 let ParserMethod = "parseBoolReg"; 946 let RenderMethod = "addRegOperands"; 947} 948 949class BoolRC : RegisterOperand<SReg_1> { 950 let ParserMatchClass = BoolReg; 951 let DecoderMethod = "decodeBoolReg"; 952} 953 954def SSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 955 let ParserMatchClass = BoolReg; 956 let DecoderMethod = "decodeBoolReg"; 957} 958 959def VOPDstS64orS32 : BoolRC { 960 let PrintMethod = "printVOPDst"; 961} 962 963// SCSrc_i1 is the operand for pseudo instructions only. 964// Boolean immediates shall not be exposed to codegen instructions. 965def SCSrc_i1 : RegisterOperand<SReg_1_XEXEC> { 966 let OperandNamespace = "AMDGPU"; 967 let OperandType = "OPERAND_REG_IMM_INT32"; 968 let ParserMatchClass = BoolReg; 969 let DecoderMethod = "decodeBoolReg"; 970} 971 972// ===----------------------------------------------------------------------===// 973// ExpSrc* Special cases for exp src operands which are printed as 974// "off" depending on en operand. 975// ===----------------------------------------------------------------------===// 976 977def ExpSrc0 : RegisterOperand<VGPR_32> { 978 let PrintMethod = "printExpSrc0"; 979 let ParserMatchClass = VReg32OrOffClass; 980} 981 982def ExpSrc1 : RegisterOperand<VGPR_32> { 983 let PrintMethod = "printExpSrc1"; 984 let ParserMatchClass = VReg32OrOffClass; 985} 986 987def ExpSrc2 : RegisterOperand<VGPR_32> { 988 let PrintMethod = "printExpSrc2"; 989 let ParserMatchClass = VReg32OrOffClass; 990} 991 992def ExpSrc3 : RegisterOperand<VGPR_32> { 993 let PrintMethod = "printExpSrc3"; 994 let ParserMatchClass = VReg32OrOffClass; 995} 996 997class SDWASrc<ValueType vt> : RegisterOperand<VS_32> { 998 let OperandNamespace = "AMDGPU"; 999 string Type = !if(vt.isFP, "FP", "INT"); 1000 let OperandType = "OPERAND_REG_INLINE_C_"#Type#vt.Size; 1001 let DecoderMethod = "decodeSDWASrc"#vt.Size; 1002 let EncoderMethod = "getSDWASrcEncoding"; 1003} 1004 1005def SDWASrc_i32 : SDWASrc<i32>; 1006def SDWASrc_i16 : SDWASrc<i16>; 1007def SDWASrc_f32 : SDWASrc<f32>; 1008def SDWASrc_f16 : SDWASrc<f16>; 1009 1010def SDWAVopcDst : BoolRC { 1011 let OperandNamespace = "AMDGPU"; 1012 let OperandType = "OPERAND_SDWA_VOPC_DST"; 1013 let EncoderMethod = "getSDWAVopcDstEncoding"; 1014 let DecoderMethod = "decodeSDWAVopcDst"; 1015 let PrintMethod = "printVOPDst"; 1016} 1017 1018class NamedIntOperand<ValueType Type, string Prefix, bit Optional = 1, 1019 string name = NAME> 1020 : CustomOperand<Type, Optional, name> { 1021 let PredicateMethod = 1022 "getPredicate([](const AMDGPUOperand &Op) -> bool { "# 1023 "return Op.isImmTy(AMDGPUOperand::"#ImmTy#"); })"; 1024 string Validator = "[](int64_t V) { return true; }"; 1025 string ConvertMethod = "[](int64_t &V) { return "#Validator#"(V); }"; 1026 let ParserMethod = 1027 "[this](OperandVector &Operands) -> ParseStatus { "# 1028 "return parseIntWithPrefix(\""#Prefix#"\", Operands, "# 1029 "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }"; 1030} 1031 1032class NamedBitOperand<string Id, string Name = NAME> 1033 : CustomOperand<i1, 1, Name> { 1034 let PredicateMethod = "isImmTy<AMDGPUOperand::"#ImmTy#">"; 1035 let ParserMethod = 1036 "[this](OperandVector &Operands) -> ParseStatus { "# 1037 "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }"; 1038 let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "# 1039 "const MCSubtargetInfo &STI, raw_ostream &O) { "# 1040 "printNamedBit(MI, OpNo, O, \""#Id#"\"); }"; 1041} 1042 1043class DefaultOperand<CustomOperand Op, int Value> 1044 : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>, 1045 CustomOperandProps<1> { 1046 let ParserMatchClass = Op.ParserMatchClass; 1047 let PrintMethod = Op.PrintMethod; 1048} 1049 1050class SDWAOperand<string Id, string Name = NAME> 1051 : CustomOperand<i32, 1, Name> { 1052 let ParserMethod = 1053 "[this](OperandVector &Operands) -> ParseStatus { "# 1054 "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }"; 1055} 1056 1057class ArrayOperand0<string Id, string Name = NAME> 1058 : OperandWithDefaultOps<i32, (ops (i32 0))>, 1059 CustomOperandProps<1, Name> { 1060 let ParserMethod = 1061 "[this](OperandVector &Operands) -> ParseStatus { "# 1062 "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "# 1063 "AMDGPUOperand::"#ImmTy#"); }"; 1064} 1065 1066let ImmTy = "ImmTyOffset" in 1067def flat_offset : CustomOperand<i32, 1, "FlatOffset">; 1068def Offset : NamedIntOperand<i32, "offset">; 1069let Validator = "isUInt<8>" in { 1070def Offset0 : NamedIntOperand<i8, "offset0">; 1071def Offset1 : NamedIntOperand<i8, "offset1">; 1072} 1073 1074def gds : NamedBitOperand<"gds", "GDS">; 1075 1076def omod : CustomOperand<i32, 1, "OModSI">; 1077def omod0 : DefaultOperand<omod, 0>; 1078 1079// We need to make the cases with a default of 0 distinct from no 1080// default to help deal with some cases where the operand appears 1081// before a mandatory operand. 1082def Clamp : NamedBitOperand<"clamp">; 1083def Clamp0 : DefaultOperand<Clamp, 0>; 1084def highmod : NamedBitOperand<"high", "High">; 1085 1086def CPol : CustomOperand<i32, 1>; 1087def CPol_0 : DefaultOperand<CPol, 0>; 1088def CPol_GLC1 : DefaultOperand<CPol, 1>; 1089def CPol_GLC : ValuePredicatedOperand<CPol, "Op.getImm() & CPol::GLC">; 1090def CPol_NonGLC : ValuePredicatedOperand<CPol, "!(Op.getImm() & CPol::GLC)", 1>; 1091def CPol_GLC_WithDefault : DefaultOperand<CPol_GLC, !shl(1, CPolBit.GLC)>; 1092def CPol_NonGLC_WithDefault : DefaultOperand<CPol_NonGLC, 0>; 1093 1094def TFE : NamedBitOperand<"tfe">; 1095def UNorm : NamedBitOperand<"unorm">; 1096def DA : NamedBitOperand<"da">; 1097def R128A16 : CustomOperand<i1, 1>; 1098def A16 : NamedBitOperand<"a16">; 1099def D16 : NamedBitOperand<"d16">; 1100def LWE : NamedBitOperand<"lwe">; 1101def exp_compr : NamedBitOperand<"compr", "ExpCompr">; 1102def exp_vm : NamedBitOperand<"vm", "ExpVM">; 1103 1104def FORMAT : CustomOperand<i8>; 1105 1106def DMask : NamedIntOperand<i16, "dmask">; 1107def Dim : CustomOperand<i8>; 1108 1109def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">; 1110def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">; 1111def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">; 1112def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">; 1113 1114def op_sel0 : ArrayOperand0<"op_sel", "OpSel">; 1115def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">; 1116def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">; 1117def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">; 1118 1119def IndexKey16bit : CustomOperand<i32, 1>; 1120def IndexKey8bit : CustomOperand<i32, 1>; 1121 1122def dpp8 : CustomOperand<i32, 0, "DPP8">; 1123def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">; 1124 1125let DefaultValue = "0xf" in { 1126def DppRowMask : NamedIntOperand<i32, "row_mask">; 1127def DppBankMask : NamedIntOperand<i32, "bank_mask">; 1128} 1129def DppBoundCtrl : NamedIntOperand<i1, "bound_ctrl"> { 1130 let ConvertMethod = "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }"; 1131} 1132 1133let DecoderMethod = "decodeDpp8FI" in 1134def Dpp8FI : NamedIntOperand<i32, "fi", 1, "DppFI">; 1135def Dpp16FI : NamedIntOperand<i32, "fi", 1, "DppFI">; 1136 1137def blgp : CustomOperand<i32, 1, "BLGP">; 1138def CBSZ : NamedIntOperand<i32, "cbsz"> { 1139 let Validator = "isUInt<3>"; 1140} 1141def ABID : NamedIntOperand<i32, "abid"> { 1142 let Validator = "isUInt<4>"; 1143} 1144def hwreg : CustomOperand<i32, 0, "Hwreg">; 1145 1146def exp_tgt : CustomOperand<i32, 0, "ExpTgt">; 1147 1148def WaitVDST : NamedIntOperand<i8, "wait_vdst"> { 1149 let Validator = "isUInt<4>"; 1150} 1151def WaitEXP : NamedIntOperand<i8, "wait_exp"> { 1152 let Validator = "isUInt<3>"; 1153} 1154def WaitVAVDst : NamedIntOperand<i8, "wait_va_vdst"> { 1155 let Validator = "isUInt<4>"; 1156} 1157def WaitVMVSrc : NamedIntOperand<i8, "wait_vm_vsrc"> { 1158 let Validator = "isUInt<1>"; 1159} 1160 1161def ByteSel : NamedIntOperand<i8, "byte_sel"> { 1162 let Validator = "isUInt<2>"; 1163} 1164 1165class KImmFPOperand<ValueType vt> : ImmOperand<vt> { 1166 let OperandNamespace = "AMDGPU"; 1167 let OperandType = "OPERAND_KIMM"#vt.Size; 1168 let PrintMethod = "printU"#vt.Size#"ImmOperand"; 1169 let DecoderMethod = "decodeOperand_KImmFP"; 1170} 1171 1172// 32-bit VALU immediate operand that uses the constant bus. 1173def KImmFP32 : KImmFPOperand<i32>; 1174 1175// 32-bit VALU immediate operand with a 16-bit value that uses the 1176// constant bus. 1177def KImmFP16 : KImmFPOperand<i16>; 1178 1179class FPInputModsMatchClass <int opSize> : AsmOperandClass { 1180 let Name = "RegOrImmWithFP"#opSize#"InputMods"; 1181 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1182 let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods"; 1183} 1184 1185class FPVCSrcInputModsMatchClass <int opSize> : FPInputModsMatchClass <opSize> { 1186 let Name = "RegOrInlineImmWithFP"#opSize#"InputMods"; 1187 let PredicateMethod = "isRegOrInlineImmWithFP"#opSize#"InputMods"; 1188} 1189 1190def FP16InputModsMatchClass : FPInputModsMatchClass<16>; 1191def FPT16InputModsMatchClass : FPInputModsMatchClass<16> { 1192 let Name = "RegOrImmWithFPT16InputMods"; 1193 let PredicateMethod = "isRegOrImmWithFPT16InputMods"; 1194} 1195def FP32InputModsMatchClass : FPInputModsMatchClass<32>; 1196def FP64InputModsMatchClass : FPInputModsMatchClass<64>; 1197 1198class FP16VCSrcInputModsMatchClass<bit IsFake16> 1199 : FPVCSrcInputModsMatchClass<16> { 1200 let Name = !if(IsFake16, "RegOrInlineImmWithFPFake16InputMods", 1201 "RegOrInlineImmWithFPT16InputMods"); 1202 let PredicateMethod = "isRegOrInlineImmWithFP16InputMods<" # 1203 !if(IsFake16, "true", "false") # ">"; 1204} 1205def FP32VCSrcInputModsMatchClass : FPVCSrcInputModsMatchClass<32>; 1206 1207class InputMods <AsmOperandClass matchClass> : Operand <i32> { 1208 let OperandNamespace = "AMDGPU"; 1209 let OperandType = "OPERAND_INPUT_MODS"; 1210 let ParserMatchClass = matchClass; 1211} 1212 1213class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> { 1214 let PrintMethod = "printOperandAndFPInputMods"; 1215} 1216 1217def FP16InputMods : FPInputMods<FP16InputModsMatchClass>; 1218def FPT16InputMods : FPInputMods<FPT16InputModsMatchClass>; 1219def FP32InputMods : FPInputMods<FP32InputModsMatchClass>; 1220def FP64InputMods : FPInputMods<FP64InputModsMatchClass>; 1221 1222class FP16VCSrcInputMods<bit IsFake16> 1223 : FPInputMods<FP16VCSrcInputModsMatchClass<IsFake16>>; 1224def FP32VCSrcInputMods : FPInputMods<FP32VCSrcInputModsMatchClass>; 1225 1226class IntInputModsMatchClass <int opSize> : AsmOperandClass { 1227 let Name = "RegOrImmWithInt"#opSize#"InputMods"; 1228 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1229 let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods"; 1230} 1231class IntVCSrcInputModsMatchClass <int opSize> : IntInputModsMatchClass <opSize> { 1232 let Name = "RegOrInlineImmWithInt"#opSize#"InputMods"; 1233 let PredicateMethod = "isRegOrInlineImmWithInt"#opSize#"InputMods"; 1234} 1235def IntT16InputModsMatchClass : IntInputModsMatchClass<16> { 1236 let Name = "RegOrImmWithIntT16InputMods"; 1237 let PredicateMethod = "isRegOrImmWithIntT16InputMods"; 1238} 1239def Int32InputModsMatchClass : IntInputModsMatchClass<32>; 1240def Int64InputModsMatchClass : IntInputModsMatchClass<64>; 1241def Int32VCSrcInputModsMatchClass : IntVCSrcInputModsMatchClass<32>; 1242 1243class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> { 1244 let PrintMethod = "printOperandAndIntInputMods"; 1245} 1246def IntT16InputMods : IntInputMods<IntT16InputModsMatchClass>; 1247def Int32InputMods : IntInputMods<Int32InputModsMatchClass>; 1248def Int64InputMods : IntInputMods<Int64InputModsMatchClass>; 1249def Int32VCSrcInputMods : IntInputMods<Int32VCSrcInputModsMatchClass>; 1250 1251class OpSelModsMatchClass : AsmOperandClass { 1252 let Name = "OpSelMods"; 1253 let ParserMethod = "parseRegOrImm"; 1254 let PredicateMethod = "isRegOrImm"; 1255} 1256 1257def IntOpSelModsMatchClass : OpSelModsMatchClass; 1258def IntOpSelMods : InputMods<IntOpSelModsMatchClass>; 1259 1260class FPSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1261 let Name = "SDWAWithFP"#opSize#"InputMods"; 1262 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1263 let PredicateMethod = "isSDWAFP"#opSize#"Operand"; 1264} 1265 1266def FP16SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<16>; 1267def FP32SDWAInputModsMatchClass : FPSDWAInputModsMatchClass<32>; 1268 1269class FPSDWAInputMods <FPSDWAInputModsMatchClass matchClass> : 1270 InputMods <matchClass> { 1271 let PrintMethod = "printOperandAndFPInputMods"; 1272} 1273 1274def FP16SDWAInputMods : FPSDWAInputMods<FP16SDWAInputModsMatchClass>; 1275def FP32SDWAInputMods : FPSDWAInputMods<FP32SDWAInputModsMatchClass>; 1276 1277def FPVRegInputModsMatchClass : AsmOperandClass { 1278 let Name = "VRegWithFPInputMods"; 1279 let ParserMethod = "parseRegWithFPInputMods"; 1280 let PredicateMethod = "isVRegWithInputMods"; 1281} 1282 1283class FPT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass { 1284 let Name = !if(IsFake16, "Fake16VRegWithFPInputMods", 1285 "T16VRegWithFPInputMods"); 1286 let ParserMethod = "parseRegWithFPInputMods"; 1287 let PredicateMethod = "isT16VRegWithInputMods<" # 1288 !if(IsFake16, "true", "false") # ">"; 1289} 1290 1291def FPVRegInputMods : InputMods <FPVRegInputModsMatchClass> { 1292 let PrintMethod = "printOperandAndFPInputMods"; 1293} 1294 1295class FPT16VRegInputMods<bit IsFake16> 1296 : InputMods <FPT16VRegInputModsMatchClass<IsFake16>> { 1297 let PrintMethod = "printOperandAndFPInputMods"; 1298} 1299 1300class IntSDWAInputModsMatchClass <int opSize> : AsmOperandClass { 1301 let Name = "SDWAWithInt"#opSize#"InputMods"; 1302 let ParserMethod = "parseRegOrImmWithIntInputMods"; 1303 let PredicateMethod = "isSDWAInt"#opSize#"Operand"; 1304} 1305 1306def Int16SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<16>; 1307def Int32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32>; 1308def Bin32SDWAInputModsMatchClass : IntSDWAInputModsMatchClass<32> { 1309 let Name = "SDWAWithBin32InputMods"; 1310 let ParserMethod = "parseRegOrImm"; 1311} 1312 1313class IntSDWAInputMods <IntSDWAInputModsMatchClass matchClass> : 1314 InputMods <matchClass> { 1315 let PrintMethod = "printOperandAndIntInputMods"; 1316} 1317 1318def Int16SDWAInputMods : IntSDWAInputMods<Int16SDWAInputModsMatchClass>; 1319def Int32SDWAInputMods : IntSDWAInputMods<Int32SDWAInputModsMatchClass>; 1320def Bin32SDWAInputMods : IntSDWAInputMods<Bin32SDWAInputModsMatchClass>; 1321 1322def IntVRegInputModsMatchClass : AsmOperandClass { 1323 let Name = "VRegWithIntInputMods"; 1324 let ParserMethod = "parseRegWithIntInputMods"; 1325 let PredicateMethod = "isVRegWithInputMods"; 1326} 1327 1328class IntT16VRegInputModsMatchClass<bit IsFake16> : AsmOperandClass { 1329 let Name = !if(IsFake16, "Fake16VRegWithIntInputMods", 1330 "T16VRegWithIntInputMods"); 1331 let ParserMethod = "parseRegWithIntInputMods"; 1332 let PredicateMethod = "isT16VRegWithInputMods<" # 1333 !if(IsFake16, "true", "false") # ">"; 1334} 1335 1336class IntT16VRegInputMods<bit IsFake16> 1337 : InputMods <IntT16VRegInputModsMatchClass<IsFake16>> { 1338 let PrintMethod = "printOperandAndIntInputMods"; 1339} 1340 1341def IntVRegInputMods : InputMods <IntVRegInputModsMatchClass> { 1342 let PrintMethod = "printOperandAndIntInputMods"; 1343} 1344 1345class PackedFPInputModsMatchClass <int opSize> : AsmOperandClass { 1346 let Name = "PackedFP"#opSize#"InputMods"; 1347 let ParserMethod = "parseRegOrImmWithFPInputMods"; 1348 let PredicateMethod = "isPackedFP"#opSize#"InputMods"; 1349} 1350 1351class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass { 1352 let Name = "PackedInt"#opSize#"InputMods"; 1353 let ParserMethod = "parseRegOrImm"; 1354 let PredicateMethod = "isRegOrImm"; 1355// let PredicateMethod = "isPackedInt"#opSize#"InputMods"; 1356} 1357 1358def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>; 1359def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>; 1360 1361class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> { 1362 let PrintMethod = "printOperandAndFPInputMods"; 1363} 1364 1365class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <matchClass> { 1366 //let PrintMethod = "printPackedIntInputMods"; 1367} 1368 1369def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>; 1370def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>; 1371 1372//===----------------------------------------------------------------------===// 1373// Complex patterns 1374//===----------------------------------------------------------------------===// 1375 1376def DS1Addr1Offset : ComplexPattern<iPTR, 2, "SelectDS1Addr1Offset">; 1377def DS64Bit4ByteAligned : ComplexPattern<iPTR, 3, "SelectDS64Bit4ByteAligned">; 1378def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">; 1379 1380def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">; 1381 1382def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">; 1383 1384// Modifiers for floating point instructions. 1385def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">; 1386 1387// VOP3 modifiers used for instructions that do not read canonicalized 1388// floating point values (i.e. integer operations with FP source 1389// modifiers) 1390def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2, 1391 "SelectVOP3ModsNonCanonicalizing">; 1392 1393def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">; 1394 1395def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">; 1396 1397def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">; 1398 1399def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">; 1400def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">; 1401def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">; 1402 1403def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">; 1404def WMMAModsF16Neg : ComplexPattern<untyped, 2, "SelectWMMAModsF16Neg">; 1405def WMMAModsF16NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF16NegAbs">; 1406def WMMAVISrc : ComplexPattern<untyped, 1, "SelectWMMAVISrc">; 1407def SWMMACIndex8 : ComplexPattern<untyped, 2, "SelectSWMMACIndex8">; 1408def SWMMACIndex16 : ComplexPattern<untyped, 2, "SelectSWMMACIndex16">; 1409 1410def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">; 1411 1412def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">; 1413 1414def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">; 1415def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">; 1416 1417def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">; 1418def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">; 1419 1420//===----------------------------------------------------------------------===// 1421// SI assembler operands 1422//===----------------------------------------------------------------------===// 1423 1424def SIOperand { 1425 int ZERO = 0x80; 1426 int VCC = 0x6A; 1427 int FLAT_SCR = 0x68; 1428} 1429 1430// This should be kept in sync with SISrcMods enum 1431def SRCMODS { 1432 int NONE = 0; 1433 int NEG = 1; 1434 int ABS = 2; 1435 int NEG_ABS = 3; 1436 1437 int NEG_HI = ABS; 1438 int OP_SEL_0 = 4; 1439 int OP_SEL_1 = 8; 1440 int DST_OP_SEL = 8; 1441} 1442 1443def DSTCLAMP { 1444 int NONE = 0; 1445 int ENABLE = 1; 1446} 1447 1448def DSTOMOD { 1449 int NONE = 0; 1450} 1451 1452def HWREG { 1453 int MODE = 1; 1454 int STATUS = 2; 1455 int TRAPSTS = 3; 1456 int HW_ID = 4; 1457 int GPR_ALLOC = 5; 1458 int LDS_ALLOC = 6; 1459 int IB_STS = 7; 1460 int MEM_BASES = 15; 1461 int TBA_LO = 16; 1462 int TBA_HI = 17; 1463 int TMA_LO = 18; 1464 int TMA_HI = 19; 1465 int FLAT_SCR_LO = 20; 1466 int FLAT_SCR_HI = 21; 1467 int XNACK_MASK = 22; 1468 int POPS_PACKER = 25; 1469 int SHADER_CYCLES = 29; 1470} 1471 1472class getHwRegImm<int Reg, int Offset = 0, int Size = 32> { 1473 int ret = !and(!or(Reg, 1474 !shl(Offset, 6), 1475 !shl(!add(Size, -1), 11)), 65535); 1476} 1477 1478//===----------------------------------------------------------------------===// 1479// 1480// SI Instruction multiclass helpers. 1481// 1482// Instructions with _32 take 32-bit operands. 1483// Instructions with _64 take 64-bit operands. 1484// 1485// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit 1486// encoding is the standard encoding, but instruction that make use of 1487// any of the instruction modifiers must use the 64-bit encoding. 1488// 1489// Instructions with _e32 use the 32-bit encoding. 1490// Instructions with _e64 use the 64-bit encoding. 1491// 1492//===----------------------------------------------------------------------===// 1493 1494class SIMCInstr <string pseudo, int subtarget> { 1495 string PseudoInstr = pseudo; 1496 int Subtarget = subtarget; 1497} 1498 1499//===----------------------------------------------------------------------===// 1500// Vector ALU classes 1501//===----------------------------------------------------------------------===// 1502 1503class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> { 1504 int ret = 1505 !if (!eq(Src0.Value, untyped.Value), 0, 1506 !if (!eq(Src1.Value, untyped.Value), 1, // VOP1 1507 !if (!eq(Src2.Value, untyped.Value), 2, // VOP2 1508 3))); // VOP3 1509} 1510 1511// Returns the register class to use for the destination of VOP[123C] 1512// instructions for the given VT. 1513class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> { 1514 defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16, 1515 VOPDstOperand_t16Lo128), 1516 VOPDstOperand<VGPR_32>); 1517 RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>, 1518 !eq(VT.Size, 128) : VOPDstOperand<VReg_128>, 1519 !eq(VT.Size, 64) : VOPDstOperand<VReg_64>, 1520 !eq(VT.Size, 32) : VOPDstOperand<VGPR_32>, 1521 !eq(VT.Size, 16) : op16, 1522 1 : VOPDstS64orS32); // else VT == i1 1523} 1524 1525class getVALUDstForVT_fake16<ValueType VT> { 1526 RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>, 1527 !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>, 1528 !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>, 1529 !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32_Lo128>, 1530 VOPDstS64orS32)))); // else VT == i1 1531} 1532 1533// Returns the register class to use for the destination of VOP[12C] 1534// instructions with SDWA extension 1535class getSDWADstForVT<ValueType VT> { 1536 RegisterOperand ret = !if(!eq(VT.Size, 1), 1537 SDWAVopcDst, // VOPC 1538 VOPDstOperand<VGPR_32>); // VOP1/2 32-bit dst 1539} 1540 1541// Returns the register class to use for source 0 of VOP[12C] 1542// instructions for the given VT. 1543class getVOPSrc0ForVT<ValueType VT, bit IsTrue16, bit IsFake16 = 1> { 1544 RegisterOperand ret = 1545 !cond(!eq(VT, i64) : VSrc_b64, 1546 !eq(VT, f64) : VSrc_f64, 1547 !eq(VT, i32) : VSrc_b32, 1548 !eq(VT, f32) : VSrc_f32, 1549 !eq(VT, i16) : !if(IsTrue16, 1550 !if(IsFake16, VSrcFake16_b16_Lo128, VSrcT_b16_Lo128), 1551 VSrc_b16), 1552 !eq(VT, f16) : !if(IsTrue16, 1553 !if(IsFake16, VSrcFake16_f16_Lo128, VSrcT_f16_Lo128), 1554 VSrc_f16), 1555 !eq(VT, bf16) : !if(IsTrue16, 1556 !if(IsFake16, VSrcFake16_bf16_Lo128, VSrcT_bf16_Lo128), 1557 VSrc_bf16), 1558 !eq(VT, v2i16) : VSrc_v2b16, 1559 !eq(VT, v2f16) : VSrc_v2f16, 1560 !eq(VT, v2bf16) : VSrc_v2bf16, 1561 !eq(VT, v4f16) : AVSrc_64, 1562 !eq(VT, v4bf16) : AVSrc_64, 1563 1 : VSrc_b32); 1564} 1565 1566class getSOPSrcForVT<ValueType VT> { 1567 RegisterOperand ret = !if(!eq(VT.Size, 64), SSrc_b64, SSrc_b32); 1568} 1569 1570// Returns the vreg register class to use for source operand given VT 1571class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> { 1572 RegisterOperand ret = 1573 !cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>, 1574 !eq(VT.Size, 96) : RegisterOperand<VReg_96>, 1575 !eq(VT.Size, 64) : RegisterOperand<VReg_64>, 1576 !eq(VT.Size, 48) : RegisterOperand<VReg_64>, 1577 !eq(VT.Size, 16) : !if(IsTrue16, 1578 !if(IsFake16, VGPRSrc_32_Lo128, VGPRSrc_16_Lo128), 1579 RegisterOperand<VGPR_32>), 1580 1 : RegisterOperand<VGPR_32>); 1581} 1582 1583class getSDWASrcForVT <ValueType VT> { 1584 RegisterOperand retFlt = !if(!eq(VT.Size, 16), SDWASrc_f16, SDWASrc_f32); 1585 RegisterOperand retInt = !if(!eq(VT.Size, 16), SDWASrc_i16, SDWASrc_i32); 1586 RegisterOperand ret = !if(VT.isFP, retFlt, retInt); 1587} 1588 1589// Returns the register class to use for sources of VOP3 instructions for the 1590// given VT. 1591class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> { 1592 RegisterOperand ret = 1593 !cond(!eq(VT, f64) : VSrc_f64, 1594 !eq(VT, f32) : VSrc_f32, 1595 !eq(VT, f16) : !if(IsTrue16, VSrcT_f16, VSrc_f16), 1596 !eq(VT, bf16) : !if(IsTrue16, VSrcT_bf16, VSrc_bf16), 1597 !eq(VT, i16) : !if(IsTrue16, VSrcT_b16, VSrc_b16), 1598 !eq(VT, i1) : SSrc_i1, 1599 !eq(VT, v2f32) : VSrc_v2f32, 1600 !eq(VT, v2i32) : VSrc_v2b32, 1601 !eq(VT, v2f16) : VSrc_v2f16, 1602 !eq(VT, v2bf16) : VSrc_v2bf16, 1603 !eq(VT, v2i16) : VSrc_v2b16, 1604 !eq(VT, v4f16) : AVSrc_64, 1605 !eq(VT, v4bf16) : AVSrc_64, 1606 !eq(VT.Size, 128) : VRegSrc_128, 1607 !eq(VT.Size, 96) : VRegSrc_96, 1608 !eq(VT.Size, 64) : VSrc_b64, 1609 1 : VSrc_b32); 1610} 1611 1612// Src2 of VOP3 DPP instructions cannot be a literal 1613class getVOP3DPPSrcForVT<ValueType VT> { 1614 RegisterOperand ret = 1615 !cond(!eq(VT, i1) : SSrc_i1, 1616 !eq(VT, i16) : VCSrc_b16, 1617 !eq(VT, f16) : VCSrc_f16, 1618 !eq(VT, bf16) : VCSrc_bf16, 1619 !eq(VT, v2i16) : VCSrc_v2b16, 1620 !eq(VT, v2f16) : VCSrc_v2f16, 1621 !eq(VT, v2bf16) : VCSrc_v2bf16, 1622 !eq(VT, f32) : VCSrc_f32, 1623 1 : VCSrc_b32); 1624} 1625 1626// Float or packed int 1627class isModifierType<ValueType SrcVT> { 1628 bit ret = !or(!eq(SrcVT.Value, f16.Value), 1629 !eq(SrcVT.Value, bf16.Value), 1630 !eq(SrcVT.Value, f32.Value), 1631 !eq(SrcVT.Value, f64.Value), 1632 !eq(SrcVT.Value, v2f16.Value), 1633 !eq(SrcVT.Value, v2i16.Value), 1634 !eq(SrcVT.Value, v2bf16.Value), 1635 !eq(SrcVT.Value, v2f32.Value), 1636 !eq(SrcVT.Value, v2i32.Value), 1637 !eq(SrcVT.Value, v4f16.Value), 1638 !eq(SrcVT.Value, v4i16.Value), 1639 !eq(SrcVT.Value, v4bf16.Value), 1640 !eq(SrcVT.Value, v4f32.Value), 1641 !eq(SrcVT.Value, v4i32.Value), 1642 !eq(SrcVT.Value, v8f16.Value), 1643 !eq(SrcVT.Value, v8i16.Value), 1644 !eq(SrcVT.Value, v8bf16.Value), 1645 !eq(SrcVT.Value, v8f32.Value), 1646 !eq(SrcVT.Value, v8i32.Value), 1647 !eq(SrcVT.Value, v16f16.Value), 1648 !eq(SrcVT.Value, v16i16.Value), 1649 !eq(SrcVT.Value, v16bf16.Value)); 1650} 1651 1652// Return type of input modifiers operand for specified input operand 1653class getSrcMod <ValueType VT, bit IsTrue16 = 0> { 1654 Operand ret = !if(!eq(VT.Size, 64), 1655 !if(VT.isFP, FP64InputMods, Int64InputMods), 1656 !if(!eq(VT.Size, 16), 1657 !if(VT.isFP, !if(IsTrue16, FPT16InputMods, FP16InputMods), 1658 !if(IsTrue16, IntT16InputMods, IntOpSelMods)), 1659 !if(VT.isFP, FP32InputMods, Int32InputMods))); 1660} 1661 1662class getOpSelMod <ValueType VT> { 1663 Operand ret = !cond(!eq(VT, f16) : FP16InputMods, 1664 !eq(VT, bf16) : FP16InputMods, 1665 !eq(VT, v2f16) : PackedF16InputMods, 1666 !eq(VT, v2bf16) : PackedF16InputMods, 1667 1 : IntOpSelMods); 1668} 1669 1670// Return type of input modifiers operand specified input operand for DPP 1671class getSrcModDPP <ValueType VT> { 1672 Operand ret = !if(VT.isFP, FPVRegInputMods, IntVRegInputMods); 1673} 1674 1675class getSrcModDPP_t16 <ValueType VT, bit IsFake16 = 1> { 1676 Operand ret = 1677 !if (VT.isFP, 1678 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1679 FPT16VRegInputMods<IsFake16>, FPVRegInputMods), 1680 !if (!eq(VT.Value, i16.Value), 1681 IntT16VRegInputMods<IsFake16>, IntVRegInputMods)); 1682} 1683 1684// Return type of input modifiers operand for specified input operand for DPP 1685class getSrcModVOP3DPP <ValueType VT, bit IsFake16 = 1> { 1686 Operand ret = 1687 !if (VT.isFP, 1688 !if (!or(!eq(VT.Value, f16.Value), !eq(VT.Value, bf16.Value)), 1689 FP16VCSrcInputMods<IsFake16>, FP32VCSrcInputMods), 1690 Int32VCSrcInputMods); 1691} 1692 1693// Return type of input modifiers operand specified input operand for SDWA 1694class getSrcModSDWA <ValueType VT> { 1695 Operand ret = !if(!eq(VT.Value, f16.Value), FP16SDWAInputMods, 1696 !if(!eq(VT.Value, f32.Value), FP32SDWAInputMods, 1697 !if(!eq(VT.Value, i16.Value), Int16SDWAInputMods, 1698 !if(!eq(VT.Value, bf16.Value), FP16SDWAInputMods, 1699 Int32SDWAInputMods)))); 1700} 1701 1702// Returns the input arguments for VOP[12C] instructions for the given SrcVT. 1703class getIns32 <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs> { 1704 dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 1705 !if(!eq(NumSrcArgs, 2), (ins Src0RC:$src0, Src1RC:$src1), // VOP2 1706 (ins))); 1707} 1708 1709// Returns the input arguments for VOP3 instructions for the given SrcVT. 1710class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, 1711 RegisterOperand Src2RC, int NumSrcArgs, 1712 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1713 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1714 1715 dag ret = 1716 !if (!eq(NumSrcArgs, 0), 1717 // VOP1 without input operands (V_NOP, V_CLREXCP) 1718 (ins), 1719 /* else */ 1720 !if (!eq(NumSrcArgs, 1), 1721 !if (HasModifiers, 1722 // VOP1 with modifiers 1723 !if(HasOMod, 1724 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1725 Clamp0:$clamp, omod0:$omod), 1726 !if (HasClamp, 1727 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, Clamp0:$clamp), 1728 (ins Src0Mod:$src0_modifiers, Src0RC:$src0))) 1729 /* else */, 1730 // VOP1 without modifiers 1731 !if (HasClamp, 1732 (ins Src0RC:$src0, Clamp0:$clamp), 1733 (ins Src0RC:$src0)) 1734 /* endif */ ), 1735 !if (!eq(NumSrcArgs, 2), 1736 !if (HasModifiers, 1737 // VOP 2 with modifiers 1738 !if(HasOMod, 1739 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1740 Src1Mod:$src1_modifiers, Src1RC:$src1, 1741 Clamp0:$clamp, omod0:$omod), 1742 !con((ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1743 Src1Mod:$src1_modifiers, Src1RC:$src1), 1744 !if(HasClamp, (ins Clamp0:$clamp), (ins)))) 1745 /* else */, 1746 // VOP2 without modifiers 1747 !if (HasClamp, 1748 (ins Src0RC:$src0, Src1RC:$src1, Clamp0:$clamp), 1749 (ins Src0RC:$src0, Src1RC:$src1)) 1750 1751 /* endif */ ) 1752 /* NumSrcArgs == 3 */, 1753 !if (HasModifiers, 1754 !if (HasSrc2Mods, 1755 // VOP3 with modifiers 1756 !if (HasOMod, 1757 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1758 Src1Mod:$src1_modifiers, Src1RC:$src1, 1759 Src2Mod:$src2_modifiers, Src2RC:$src2, 1760 Clamp0:$clamp, omod0:$omod), 1761 !if (HasClamp, 1762 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1763 Src1Mod:$src1_modifiers, Src1RC:$src1, 1764 Src2Mod:$src2_modifiers, Src2RC:$src2, 1765 Clamp0:$clamp), 1766 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1767 Src1Mod:$src1_modifiers, Src1RC:$src1, 1768 Src2Mod:$src2_modifiers, Src2RC:$src2))), 1769 // VOP3 with modifiers except src2 1770 !if (HasOMod, 1771 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1772 Src1Mod:$src1_modifiers, Src1RC:$src1, 1773 Src2RC:$src2, Clamp0:$clamp, omod0:$omod), 1774 !if (HasClamp, 1775 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1776 Src1Mod:$src1_modifiers, Src1RC:$src1, 1777 Src2RC:$src2, Clamp0:$clamp), 1778 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1779 Src1Mod:$src1_modifiers, Src1RC:$src1, 1780 Src2RC:$src2)))) 1781 /* else */, 1782 // VOP3 without modifiers 1783 !if (HasClamp, 1784 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2, Clamp0:$clamp), 1785 (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)) 1786 /* endif */ )))); 1787} 1788 1789class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, 1790 RegisterOperand Src2RC, int NumSrcArgs, 1791 bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, 1792 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> { 1793 // getInst64 handles clamp and omod. implicit mutex between vop3p and omod 1794 dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, 1795 HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 1796 Src0Mod, Src1Mod, Src2Mod>.ret; 1797 dag opsel = (ins op_sel0:$op_sel); 1798 dag ret = !con(base, !if(HasOpSel, opsel, (ins))); 1799} 1800 1801class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, 1802 RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, 1803 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1804 dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, 1805 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, 1806 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret; 1807 1808 dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); 1809 dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi); 1810 1811 dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg); 1812 dag ret = !con(base, vop3pFields); 1813} 1814 1815class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, 1816 RegisterOperand Src2RC, int NumSrcArgs, 1817 bit HasClamp, bit HasOMod, 1818 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { 1819 dag ret = getInsVOP3Base<Src0RC, Src1RC, 1820 Src2RC, NumSrcArgs, 1821 HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, 1822 Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret; 1823} 1824 1825class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 1826 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 1827 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld> { 1828 dag ret = !if(!eq(NumSrcArgs, 0), 1829 // VOP1 without input operands (V_NOP) 1830 (ins ), 1831 !con( 1832 !if(HasOld ,(ins OldRC:$old), (ins)), 1833 !if (!eq(NumSrcArgs, 1), 1834 !if (HasModifiers, 1835 // VOP1_DPP with modifiers 1836 (ins Src0Mod:$src0_modifiers, Src0RC:$src0) 1837 /* else */, 1838 // VOP1_DPP without modifiers 1839 (ins Src0RC:$src0) 1840 /* endif */), 1841 !if (!eq(NumSrcArgs, 2), 1842 !if (HasModifiers, 1843 // VOP2_DPP with modifiers 1844 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1845 Src1Mod:$src1_modifiers, Src1RC:$src1) 1846 /* else */, 1847 // VOP2_DPP without modifiers 1848 (ins Src0RC:$src0, Src1RC:$src1) 1849 ) 1850 /* NumSrcArgs == 3, VOP3 */, 1851 !if (HasModifiers, 1852 // VOP3_DPP with modifiers 1853 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1854 Src1Mod:$src1_modifiers, Src1RC:$src1, 1855 Src2Mod:$src2_modifiers, Src2RC:$src2) 1856 /* else */, 1857 // VOP3_DPP without modifiers 1858 (ins Src0RC:$src0, Src1RC:$src1, 1859 Src2RC:$src2) 1860 ) 1861 ) 1862 ) 1863 ) 1864 ); 1865} 1866 1867class getInsDPP <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 1868 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 1869 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1870 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1871 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1872 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 1873 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl)); 1874} 1875 1876class getInsDPP16 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 1877 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 1878 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1879 dag ret = !con(getInsDPP<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1880 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1881 (ins Dpp16FI:$fi)); 1882} 1883 1884class getInsDPP8 <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC, 1885 RegisterOperand Src2RC, int NumSrcArgs, bit HasModifiers, 1886 Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOld = 1> { 1887 dag ret = !con(getInsDPPBase<OldRC, Src0RC, Src1RC, Src2RC, NumSrcArgs, 1888 HasModifiers, Src0Mod, Src1Mod, Src2Mod, HasOld>.ret, 1889 (ins dpp8:$dpp8, Dpp8FI:$fi)); 1890} 1891 1892class getInsVOP3DPPBase<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld> { 1893 dag old = ( ins OldRC:$old ); 1894 dag base = VOP3Base; 1895 dag ret = !con( 1896 !if(!and(HasOld,!ne(NumSrcArgs, 0)), old, (ins)), 1897 base 1898 ); 1899} 1900 1901class getInsVOP3DPP<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1902 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1903 (ins dpp_ctrl:$dpp_ctrl, DppRowMask:$row_mask, 1904 DppBankMask:$bank_mask, DppBoundCtrl:$bound_ctrl)); 1905} 1906 1907class getInsVOP3DPP16<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1908 dag ret = !con(getInsVOP3DPP<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1909 (ins Dpp16FI:$fi)); 1910} 1911 1912class getInsVOP3DPP8<dag VOP3Base, RegisterOperand OldRC, int NumSrcArgs, bit HasOld = 1> { 1913 dag ret = !con(getInsVOP3DPPBase<VOP3Base,OldRC,NumSrcArgs,HasOld>.ret, 1914 (ins dpp8:$dpp8, Dpp8FI:$fi)); 1915} 1916 1917// Ins for SDWA 1918class getInsSDWA <RegisterOperand Src0RC, RegisterOperand Src1RC, int NumSrcArgs, 1919 bit HasSDWAOMod, Operand Src0Mod, Operand Src1Mod, 1920 ValueType DstVT> { 1921 1922 dag ret = !if(!eq(NumSrcArgs, 0), 1923 // VOP1 without input operands (V_NOP) 1924 (ins), 1925 !if(!eq(NumSrcArgs, 1), 1926 // VOP1 1927 !if(!not(HasSDWAOMod), 1928 // VOP1_SDWA without omod 1929 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1930 Clamp:$clamp, 1931 dst_sel:$dst_sel, dst_unused:$dst_unused, 1932 src0_sel:$src0_sel), 1933 // VOP1_SDWA with omod 1934 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1935 Clamp:$clamp, omod:$omod, 1936 dst_sel:$dst_sel, dst_unused:$dst_unused, 1937 src0_sel:$src0_sel)), 1938 !if(!eq(NumSrcArgs, 2), 1939 !if(!eq(DstVT.Size, 1), 1940 // VOPC_SDWA 1941 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1942 Src1Mod:$src1_modifiers, Src1RC:$src1, 1943 Clamp:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel), 1944 // VOP2_SDWA 1945 !if(!not(HasSDWAOMod), 1946 // VOP2_SDWA without omod 1947 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1948 Src1Mod:$src1_modifiers, Src1RC:$src1, 1949 Clamp:$clamp, 1950 dst_sel:$dst_sel, dst_unused:$dst_unused, 1951 src0_sel:$src0_sel, src1_sel:$src1_sel), 1952 // VOP2_SDWA with omod 1953 (ins Src0Mod:$src0_modifiers, Src0RC:$src0, 1954 Src1Mod:$src1_modifiers, Src1RC:$src1, 1955 Clamp:$clamp, omod:$omod, 1956 dst_sel:$dst_sel, dst_unused:$dst_unused, 1957 src0_sel:$src0_sel, src1_sel:$src1_sel))), 1958 (ins)/* endif */))); 1959} 1960 1961// Outs for DPP 1962class getOutsDPP <bit HasDst, ValueType DstVT, RegisterOperand DstRCDPP> { 1963 dag ret = !if(HasDst, 1964 !if(!eq(DstVT.Size, 1), 1965 (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions 1966 (outs DstRCDPP:$vdst)), 1967 (outs)); // V_NOP 1968} 1969 1970// Outs for SDWA 1971class getOutsSDWA <bit HasDst, ValueType DstVT, RegisterOperand DstRCSDWA> { 1972 dag ret = !if(HasDst, 1973 !if(!eq(DstVT.Size, 1), 1974 (outs DstRCSDWA:$sdst), 1975 (outs DstRCSDWA:$vdst)), 1976 (outs)); // V_NOP 1977} 1978 1979// Returns the assembly string for the inputs and outputs of a VOP[12C] 1980// instruction. 1981class getAsm32 <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 1982 string dst = !if(!eq(DstVT.Size, 1), "$sdst", "$vdst"); // use $sdst for VOPC 1983 string src0 = ", $src0"; 1984 string src1 = ", $src1"; 1985 string src2 = ", $src2"; 1986 string ret = !if(HasDst, dst, "") # 1987 !if(!eq(NumSrcArgs, 1), src0, "") # 1988 !if(!eq(NumSrcArgs, 2), src0#src1, "") # 1989 !if(!eq(NumSrcArgs, 3), src0#src1#src2, ""); 1990} 1991 1992class getAsmVOPDPart <int NumSrcArgs, string XorY> { 1993 string dst = "$vdst" # XorY; 1994 string src0 = ", $src0" # XorY; 1995 string src1 = ", $vsrc1" # XorY; 1996 string ret = dst # 1997 !if(!ge(NumSrcArgs, 1), src0, "") # 1998 !if(!ge(NumSrcArgs, 2), src1, ""); 1999} 2000 2001// Returns the assembly string for the inputs and outputs of a VOP3P 2002// instruction. 2003class getAsmVOP3P <int NumSrcArgs, bit HasModifiers, 2004 bit HasClamp, bit HasOpSel> { 2005 string dst = "$vdst"; 2006 string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2007 string src1 = !if(!eq(NumSrcArgs, 1), "", 2008 !if(!eq(NumSrcArgs, 2), " $src1", 2009 " $src1,")); 2010 string src2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2011 2012 string mods = !if(HasModifiers, "$neg_lo$neg_hi", ""); 2013 string clamp = !if(HasClamp, "$clamp", ""); 2014 string opsel = !if(HasOpSel, "$op_sel$op_sel_hi", ""); 2015 2016 // Each modifier is printed as an array of bits for each operand, so 2017 // all operands are printed as part of src0_modifiers. 2018 string ret = dst#", "#src0#src1#src2#opsel#mods#clamp; 2019} 2020 2021class getAsmVOP3OpSel <int NumSrcArgs, 2022 bit HasClamp, 2023 bit HasOMod, 2024 bit Src0HasMods, 2025 bit Src1HasMods, 2026 bit Src2HasMods> { 2027 string dst = "$vdst"; 2028 2029 string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2030 string isrc1 = !if(!eq(NumSrcArgs, 1), "", 2031 !if(!eq(NumSrcArgs, 2), " $src1", 2032 " $src1,")); 2033 string isrc2 = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2034 2035 string fsrc0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2036 string fsrc1 = !if(!eq(NumSrcArgs, 1), "", 2037 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2038 " $src1_modifiers,")); 2039 string fsrc2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2040 2041 string src0 = !if(Src0HasMods, fsrc0, isrc0); 2042 string src1 = !if(Src1HasMods, fsrc1, isrc1); 2043 string src2 = !if(Src2HasMods, fsrc2, isrc2); 2044 2045 string clamp = !if(HasClamp, "$clamp", ""); 2046 string omod = !if(HasOMod, "$omod", ""); 2047 string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod; 2048} 2049 2050class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2051 string dst = !if(HasDst, 2052 !if(!eq(DstVT.Size, 1), 2053 "$sdst", 2054 "$vdst"), 2055 ""); // use $sdst for VOPC 2056 string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2057 string src1 = !if(!eq(NumSrcArgs, 1), "", 2058 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2059 " $src1_modifiers,")); 2060 string args = !if(!not(HasModifiers), 2061 getAsm32<0, NumSrcArgs, DstVT>.ret, 2062 ", "#src0#src1); 2063 string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2064} 2065 2066class getAsmDPP16 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> { 2067 string ret = getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret#"$fi"; 2068} 2069 2070class getAsmDPP8 <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> 2071 : getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>{ 2072 let ret = dst#args#" $dpp8$fi"; 2073} 2074 2075class getAsmVOP3Base <int NumSrcArgs, bit HasDst, bit HasClamp, 2076 bit HasOpSel, bit HasOMod, bit IsVOP3P, 2077 bit HasModifiers, bit Src0HasMods, 2078 bit Src1HasMods, bit Src2HasMods, ValueType DstVT = i32, 2079 bit HasByteSel = 0> { 2080 string dst = !if(HasDst, 2081 !if(!eq(DstVT.Size, 1), 2082 "$sdst", 2083 "$vdst"), 2084 ""); // use $sdst for VOPC 2085 string src0nomods = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,"); 2086 string src1nomods = !if(!eq(NumSrcArgs, 1), "", 2087 !if(!eq(NumSrcArgs, 2), " $src1", 2088 " $src1,")); 2089 string src2nomods = !if(!eq(NumSrcArgs, 3), " $src2", ""); 2090 2091 string src0mods = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,"); 2092 string src1mods = !if(!eq(NumSrcArgs, 1), "", 2093 !if(!eq(NumSrcArgs, 2), " $src1_modifiers", 2094 " $src1_modifiers,")); 2095 string src2mods = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); 2096 2097 string src0 = !if(Src0HasMods, src0mods, src0nomods); 2098 string src1 = !if(Src1HasMods, src1mods, src1nomods); 2099 string src2 = !if(Src2HasMods, src2mods, src2nomods); 2100 string opsel = !if(HasOpSel, "$op_sel", ""); 2101 string bytesel = !if(HasByteSel, "$byte_sel", ""); 2102 string 3PMods = !if(IsVOP3P, 2103 !if(HasOpSel, "$op_sel_hi", "") 2104 #!if(HasModifiers, "$neg_lo$neg_hi", ""), 2105 ""); 2106 string clamp = !if(HasClamp, "$clamp", ""); 2107 string omod = !if(HasOMod, "$omod", ""); 2108 2109 string ret = dst#!if(!gt(NumSrcArgs,0),", "#src0#src1#src2#opsel#bytesel#3PMods#clamp#omod, ""); 2110 2111} 2112 2113class getAsmVOP3DPP<string base> { 2114 string ret = base # " $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; 2115} 2116 2117class getAsmVOP3DPP16<string base> { 2118 string ret = getAsmVOP3DPP<base>.ret # "$fi"; 2119} 2120 2121class getAsmVOP3DPP8<string base> { 2122 string ret = base # " $dpp8$fi"; 2123} 2124 2125 2126class getAsmSDWA <bit HasDst, int NumSrcArgs, ValueType DstVT = i32> { 2127 string dst = !if(HasDst, 2128 !if(!eq(DstVT.Size, 1), 2129 " vcc", // use vcc token as dst for VOPC instructions 2130 "$vdst"), 2131 ""); 2132 string src0 = "$src0_modifiers"; 2133 string src1 = "$src1_modifiers"; 2134 string args = !if(!eq(NumSrcArgs, 0), 2135 "", 2136 !if(!eq(NumSrcArgs, 1), 2137 ", "#src0#"$clamp", 2138 ", "#src0#", "#src1#"$clamp" 2139 ) 2140 ); 2141 string sdwa = !if(!eq(NumSrcArgs, 0), 2142 "", 2143 !if(!eq(NumSrcArgs, 1), 2144 " $dst_sel $dst_unused $src0_sel", 2145 !if(!eq(DstVT.Size, 1), 2146 " $src0_sel $src1_sel", // No dst_sel and dst_unused for VOPC 2147 " $dst_sel $dst_unused $src0_sel $src1_sel" 2148 ) 2149 ) 2150 ); 2151 string ret = dst#args#sdwa; 2152} 2153 2154class getAsmSDWA9 <bit HasDst, bit HasOMod, int NumSrcArgs, 2155 ValueType DstVT = i32> { 2156 string dst = !if(HasDst, 2157 !if(!eq(DstVT.Size, 1), 2158 "$sdst", // VOPC 2159 "$vdst"), // VOP1/2 2160 ""); 2161 string src0 = "$src0_modifiers"; 2162 string src1 = "$src1_modifiers"; 2163 string out_mods = !if(!not(HasOMod), "$clamp", "$clamp$omod"); 2164 string args = !if(!eq(NumSrcArgs, 0), "", 2165 !if(!eq(NumSrcArgs, 1), 2166 ", "#src0, 2167 ", "#src0#", "#src1 2168 ) 2169 ); 2170 string sdwa = !if(!eq(NumSrcArgs, 0), "", 2171 !if(!eq(NumSrcArgs, 1), 2172 out_mods#" $dst_sel $dst_unused $src0_sel", 2173 !if(!eq(DstVT.Size, 1), 2174 " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC 2175 out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" 2176 ) 2177 ) 2178 ); 2179 string ret = dst#args#sdwa; 2180} 2181 2182class getHas64BitOps <int NumSrcArgs, ValueType DstVT, ValueType Src0VT, 2183 ValueType Src1VT> { 2184 bit ret = !if(!eq(NumSrcArgs, 3), 2185 0, 2186 !if(!eq(DstVT.Size, 64), 2187 1, 2188 !if(!eq(Src0VT.Size, 64), 2189 1, 2190 !if(!eq(Src1VT.Size, 64), 2191 1, 2192 0 2193 ) 2194 ) 2195 ) 2196 ); 2197} 2198 2199class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2200 ValueType Src1VT = i32> { 2201 bit ret = !if(!eq(NumSrcArgs, 3), 2202 0, // NumSrcArgs == 3 - No SDWA for VOP3 2203 !if(!eq(DstVT.Size, 64), 2204 0, // 64-bit dst - No SDWA for 64-bit operands 2205 !if(!eq(Src0VT.Size, 64), 2206 0, // 64-bit src0 2207 !if(!eq(Src1VT.Size, 64), 2208 0, // 64-bit src2 2209 1 2210 ) 2211 ) 2212 ) 2213 ); 2214} 2215 2216class getHasDPP <int NumSrcArgs> { 2217 bit ret = !if(!eq(NumSrcArgs, 3), 2218 0, // NumSrcArgs == 3 - No DPP for VOP3 2219 1); 2220} 2221 2222class getHasExt32BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2223 ValueType Src1VT = i32> { 2224 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2225 !not(getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret)); 2226} 2227 2228class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2229 ValueType Src1VT = i32> { 2230 bit ret = !and(getHasDPP<NumSrcArgs>.ret, 2231 getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2232} 2233 2234// Function that checks if instruction supports DPP and SDWA 2235class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32, 2236 ValueType Src1VT = i32> { 2237 bit ret = !or(getHasDPP<NumSrcArgs>.ret, 2238 getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret); 2239} 2240 2241// Return an AGPR+VGPR operand class for the given VGPR register class. 2242class getLdStRegisterOperand<RegisterClass RC> { 2243 RegisterOperand ret = 2244 !cond(!eq(RC.Size, 32) : AVLdSt_32, 2245 !eq(RC.Size, 64) : AVLdSt_64, 2246 !eq(RC.Size, 96) : AVLdSt_96, 2247 !eq(RC.Size, 128) : AVLdSt_128, 2248 !eq(RC.Size, 160) : AVLdSt_160, 2249 !eq(RC.Size, 1024) : AVLdSt_1024); 2250} 2251 2252class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32, 2253 ValueType Src1VT = i32, ValueType Src2VT = i32> { 2254 bit ret = !if(!eq(DstVT.Size, 64), 2255 0, // 64-bit dst No DPP for 64-bit operands 2256 !if(!eq(Src0VT.Size, 64), 2257 0, // 64-bit src0 2258 !if(!eq(Src1VT.Size, 64), 2259 0, // 64-bit src1 2260 !if(!eq(Src2VT.Size, 64), 2261 0, // 64-bit src2 2262 1 2263 ) 2264 ) 2265 ) 2266 ); 2267} 2268 2269 2270def PatGenMode { 2271 int NoPattern = 0; 2272 int Pattern = 1; 2273} 2274 2275class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { 2276 2277 field list<ValueType> ArgVT = _ArgVT; 2278 field bit EnableClamp = _EnableClamp; 2279 field bit IsTrue16 = 0; 2280 field bit IsRealTrue16 = 0; 2281 field bit IsInvalidSingleUseConsumer = 0; 2282 field bit IsInvalidSingleUseProducer = 0; 2283 2284 field ValueType DstVT = ArgVT[0]; 2285 field ValueType Src0VT = ArgVT[1]; 2286 field ValueType Src1VT = ArgVT[2]; 2287 field ValueType Src2VT = ArgVT[3]; 2288 field RegisterOperand DstRC = getVALUDstForVT<DstVT>.ret; 2289 field RegisterOperand DstRCDPP = DstRC; 2290 field RegisterOperand DstRC64 = DstRC; 2291 field RegisterOperand DstRCVOP3DPP = DstRC64; 2292 field RegisterOperand DstRCSDWA = getSDWADstForVT<DstVT>.ret; 2293 field RegisterOperand Src0RC32 = getVOPSrc0ForVT<Src0VT, IsTrue16>.ret; 2294 field RegisterOperand Src1RC32 = getVregSrcForVT<Src1VT>.ret; 2295 field RegisterOperand Src0RC64 = getVOP3SrcForVT<Src0VT>.ret; 2296 field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret; 2297 field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret; 2298 field RegisterOperand Src0DPP = getVregSrcForVT<Src0VT>.ret; 2299 field RegisterOperand Src1DPP = getVregSrcForVT<Src1VT>.ret; 2300 field RegisterOperand Src2DPP = getVregSrcForVT<Src2VT>.ret; 2301 field RegisterOperand Src0VOP3DPP = VGPRSrc_32; 2302 field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret; 2303 field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret; 2304 field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret; 2305 field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret; 2306 field Operand Src0Mod = getSrcMod<Src0VT>.ret; 2307 field Operand Src1Mod = getSrcMod<Src1VT>.ret; 2308 field Operand Src2Mod = getSrcMod<Src2VT>.ret; 2309 field Operand Src0ModDPP = getSrcModDPP<Src0VT>.ret; 2310 field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; 2311 field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; 2312 field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret; 2313 field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret; 2314 field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret; 2315 field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; 2316 field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; 2317 2318 2319 field bit IsMAI = 0; 2320 field bit IsVOP3P = 0; 2321 field bit IsDOT = 0; 2322 field bit IsSingle = 0; 2323 field bit IsWMMA = 0; 2324 field bit IsSWMMAC = 0; 2325 2326 field bit IsFP8SrcByteSel = 0; 2327 field bit IsFP8DstByteSel = 0; 2328 field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel); 2329 2330 field bit HasDst = !ne(DstVT.Value, untyped.Value); 2331 field bit HasDst32 = HasDst; 2332 field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case 2333 field bit EmitDstSel = EmitDst; 2334 field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret; 2335 field bit HasSrc0 = !ne(Src0VT.Value, untyped.Value); 2336 field bit HasSrc1 = !ne(Src1VT.Value, untyped.Value); 2337 field bit HasSrc2 = !ne(Src2VT.Value, untyped.Value); 2338 2339 field bit HasSrc0FloatMods = Src0VT.isFP; 2340 field bit HasSrc1FloatMods = Src1VT.isFP; 2341 field bit HasSrc2FloatMods = Src2VT.isFP; 2342 2343 field bit HasSrc0IntMods = isIntType<Src0VT>.ret; 2344 field bit HasSrc1IntMods = isIntType<Src1VT>.ret; 2345 field bit HasSrc2IntMods = isIntType<Src2VT>.ret; 2346 2347 field bit HasClamp = !or(isModifierType<Src0VT>.ret, EnableClamp); 2348 field bit HasSDWAClamp = EmitDst; 2349 field bit HasFPClamp = !and(DstVT.isFP, HasClamp); 2350 field bit HasIntClamp = !if(DstVT.isFP, 0, HasClamp); 2351 field bit HasClampLo = HasClamp; 2352 field bit HasClampHi = !and(DstVT.isVector, HasClamp); 2353 field bit HasHigh = 0; 2354 2355 field bit IsPacked = Src0VT.isVector; 2356 field bit HasOpSel = IsPacked; 2357 field bit HasOMod = !if(IsVOP3P, 0, DstVT.isFP); 2358 field bit HasSDWAOMod = DstVT.isFP; 2359 2360 field bit HasModifiers = !or(isModifierType<Src0VT>.ret, 2361 isModifierType<Src1VT>.ret, 2362 isModifierType<Src2VT>.ret, 2363 HasOMod); 2364 2365 field bit HasSrc0Mods = HasModifiers; 2366 field bit HasSrc1Mods = !if(HasModifiers, !or(HasSrc1FloatMods, HasSrc1IntMods), 0); 2367 field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0); 2368 2369 field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2370 field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret; 2371 field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP); 2372 field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2373 field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2374 field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret; 2375 field bit HasExtSDWA9 = HasExtSDWA; 2376 field int NeedPatGen = PatGenMode.NoPattern; 2377 2378 field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); 2379 field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); 2380 field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods); 2381 2382 field dag Outs = !if(HasDst,(outs DstRC:$vdst),(outs)); 2383 2384 // VOP3b instructions are a special case with a second explicit 2385 // output. This is manually overridden for them. 2386 field dag Outs32 = Outs; 2387 field dag Outs64 = !if(HasDst,(outs DstRC64:$vdst),(outs)); 2388 field dag OutsDPP = getOutsDPP<HasDst, DstVT, DstRCDPP>.ret; 2389 field dag OutsDPP8 = OutsDPP; 2390 field dag OutsVOP3DPP = getOutsDPP<HasDst, DstVT, DstRCVOP3DPP>.ret; 2391 field dag OutsVOP3DPP8 = OutsVOP3DPP; 2392 field dag OutsSDWA = getOutsSDWA<HasDst, DstVT, DstRCSDWA>.ret; 2393 2394 field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret; 2395 field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs, 2396 HasClamp, HasModifiers, HasSrc2Mods, 2397 HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; 2398 field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64, 2399 NumSrcArgs, HasClamp, HasOpSel, 2400 Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret; 2401 field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, 2402 NumSrcArgs, HasClamp, HasOMod, 2403 getOpSelMod<Src0VT>.ret, 2404 getOpSelMod<Src1VT>.ret, 2405 getOpSelMod<Src2VT>.ret>.ret; 2406 field dag InsDPP = !if(HasExtDPP, 2407 getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2408 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret, 2409 (ins)); 2410 field dag InsDPP16 = getInsDPP16<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, 2411 HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2412 field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, 2413 NumSrcArgs, HasModifiers, 2414 Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; 2415 defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, 2416 Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, 2417 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret; 2418 defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP, 2419 Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, 2420 Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret; 2421 2422 field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase); 2423 2424 field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2425 field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2426 field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; 2427 field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, 2428 HasSDWAOMod, Src0ModSDWA, Src1ModSDWA, 2429 DstVT>.ret; 2430 field dag InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X); 2431 // It is a slight misnomer to use the deferred f32 operand type for non-float 2432 // operands, but this operand type will only be used if the other dual 2433 // component is FMAAK or FMAMK 2434 field dag InsVOPDXDeferred = (ins !if(!eq(Src0VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0X, VGPR_32:$vsrc1X); 2435 field dag InsVOPDY = (ins Src0RC32:$src0Y, Src1RC32:$vsrc1Y); 2436 field dag InsVOPDYDeferred = (ins !if(!eq(Src1VT.Size, 32), VSrc_f32_Deferred, VSrc_f16_Deferred):$src0Y, VGPR_32:$vsrc1Y); 2437 2438 2439 field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret; 2440 field string AsmDPP = !if(HasExtDPP, 2441 getAsmDPP<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret, ""); 2442 field string AsmDPP16 = getAsmDPP16<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret; 2443 // DPP8 encoding has no fields for modifiers, and it is enforced by setting 2444 // the asm operand name via this HasModifiers flag 2445 field string AsmDPP8 = getAsmDPP8<HasDst, NumSrcArgs, 0 /*HasModifiers*/, DstVT>.ret; 2446 field string AsmVOP3Base = getAsmVOP3Base<NumSrcArgs, HasDst, HasClamp, 2447 HasOpSel, HasOMod, IsVOP3P, HasModifiers, HasModifiers, HasModifiers, 2448 HasModifiers, DstVT, IsFP8ByteSel>.ret; 2449 field string Asm64 = AsmVOP3Base; 2450 field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret; 2451 field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, 2452 HasClamp, 2453 HasOMod, 2454 HasSrc0FloatMods, 2455 HasSrc1FloatMods, 2456 HasSrc2FloatMods>.ret; 2457 field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret; 2458 field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret; 2459 field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret; 2460 field string AsmSDWA = getAsmSDWA<HasDst, NumSrcArgs, DstVT>.ret; 2461 field string AsmSDWA9 = getAsmSDWA9<HasDst, HasSDWAOMod, NumSrcArgs, DstVT>.ret; 2462 field string AsmVOPDX = getAsmVOPDPart<NumSrcArgs, "X">.ret; 2463 field string AsmVOPDY = getAsmVOPDPart<NumSrcArgs, "Y">.ret; 2464 field string TieRegDPP = "$old"; 2465} 2466 2467 class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> { 2468 let HasExt = 0; 2469 let HasExtDPP = 0; 2470 let HasExtVOP3DPP = 0; 2471 let HasExt32BitDPP = 0; 2472 let HasExt64BitDPP = 0; 2473 let HasExtSDWA = 0; 2474 let HasExtSDWA9 = 0; 2475} 2476 2477class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> { 2478 let NeedPatGen = mode; 2479} 2480 2481// VOPC_Profile_t16, VOPC_NoSdst_Profile_t16, VOPC_Class_Profile_t16, 2482// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this 2483// class, so copy changes to this class in those profiles 2484class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> { 2485 let IsTrue16 = 1; 2486 let IsRealTrue16 = 1; 2487 2488 let HasOpSel = 1; 2489 let HasModifiers = 1; // All instructions at least have OpSel. 2490 2491 // Most DstVT are 16-bit, but not all. 2492 let DstRC = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 0 /*IsVOP3Encoding*/>.ret; 2493 let DstRC64 = getVALUDstForVT<DstVT>.ret; 2494 let Src0RC32 = getVOPSrc0ForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2495 let Src1RC32 = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2496 let Src0DPP = getVregSrcForVT<Src0VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2497 let Src1DPP = getVregSrcForVT<Src1VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2498 let Src2DPP = getVregSrcForVT<Src2VT, 1 /*IsTrue16*/, 0 /*IsFake16*/>.ret; 2499 let Src0ModDPP = getSrcModDPP_t16<Src0VT, 0 /*IsFake16*/>.ret; 2500 let Src1ModDPP = getSrcModDPP_t16<Src1VT, 0 /*IsFake16*/>.ret; 2501 let Src2ModDPP = getSrcModDPP_t16<Src2VT, 0 /*IsFake16*/>.ret; 2502 let Src0VOP3DPP = VGPRSrc_16; 2503 let Src0ModVOP3DPP = getSrcModVOP3DPP<Src0VT, 0 /*IsFake16*/>.ret; 2504 let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 0 /*IsFake16*/>.ret; 2505 let Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT, 0 /*IsFake16*/>.ret; 2506 2507 let DstRC64 = getVALUDstForVT<DstVT, 1 /*IsTrue16*/, 1 /*IsVOP3Encoding*/>.ret; 2508 let Src0RC64 = getVOP3SrcForVT<Src0VT, 1 /*IsTrue16*/>.ret; 2509 let Src1RC64 = getVOP3SrcForVT<Src1VT, 1 /*IsTrue16*/>.ret; 2510 let Src2RC64 = getVOP3SrcForVT<Src2VT, 1 /*IsTrue16*/>.ret; 2511 let Src0Mod = getSrcMod<Src0VT, 1 /*IsTrue16*/>.ret; 2512 let Src1Mod = getSrcMod<Src1VT, 1 /*IsTrue16*/>.ret; 2513 let Src2Mod = getSrcMod<Src2VT, 1 /*IsTrue16*/>.ret; 2514} 2515 2516class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> { 2517 let IsTrue16 = 1; 2518 // Most DstVT are 16-bit, but not all 2519 let DstRC = getVALUDstForVT_fake16<DstVT>.ret; 2520 let DstRC64 = getVALUDstForVT<DstVT>.ret; 2521 let Src1RC32 = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2522 let Src0DPP = getVregSrcForVT<Src0VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2523 let Src1DPP = getVregSrcForVT<Src1VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2524 let Src2DPP = getVregSrcForVT<Src2VT, 1/*IsTrue16*/, 1/*IsFake16*/>.ret; 2525 let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret; 2526 let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret; 2527 let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret; 2528} 2529 2530def VOP_F16_F16 : VOPProfile<[f16, f16, untyped, untyped]>; 2531def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>; 2532def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>; 2533def VOP_I16_I16 : VOPProfile <[i16, i16, untyped, untyped]>; 2534 2535def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>; 2536def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>; 2537def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>; 2538def VOP_I16_I16_I16 : VOPProfile <[i16, i16, i16, untyped]>; 2539def VOP_I16_I16_I16_ARITH : VOPProfile <[i16, i16, i16, untyped], /*EnableClamp=*/1>; 2540 2541def VOP_I16_I16_I16_I16 : VOPProfile <[i16, i16, i16, i16, untyped]>; 2542def VOP_F16_F16_F16_F16 : VOPProfile <[f16, f16, f16, f16, untyped]>; 2543 2544def VOP_I32_I16_I16_I32 : VOPProfile <[i32, i16, i16, i32, untyped]>; 2545def VOP_I32_I16 : VOPProfile <[i32, i16, untyped, untyped]>; 2546def VOP_I16_I32 : VOPProfile <[i16, i32, untyped, untyped]>; 2547 2548def VOP_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, untyped]>; 2549def VOP_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, untyped]>; 2550def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; 2551 2552def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; 2553def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; 2554def VOP_V2I16_F32_F32 : VOPProfile <[v2i16, f32, f32, untyped]>; 2555def VOP_V2I16_I32_I32 : VOPProfile <[v2i16, i32, i32, untyped]>; 2556 2557def VOP_F16_V2F16_V2F16_F16 : VOPProfile <[f16, v2f16, v2f16, f16]>; 2558def VOP_BF16_V2BF16_V2BF16_BF16: VOPProfile <[bf16, v2bf16, v2bf16, bf16]>; 2559def VOP_F32_V2BF16_V2BF16_F32 : VOPProfile <[f32, v2bf16, v2bf16, f32]>; 2560 2561def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; 2562 2563def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; 2564 2565def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; 2566def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>; 2567def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>; 2568def VOP_F64_F32 : VOPProfile <[f64, f32, untyped, untyped]>; 2569def VOP_F64_F64 : VOPProfile <[f64, f64, untyped, untyped]>; 2570def VOP_F64_I32 : VOPProfile <[f64, i32, untyped, untyped]>; 2571def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>; 2572def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>; 2573def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>; 2574def VOP_F16_F32 : VOPProfile <[f16, f32, untyped, untyped]>; 2575def VOP_F32_F16 : VOPProfile <[f32, f16, untyped, untyped]>; 2576def VOP_I64_I64 : VOPProfile <[i64, i64, untyped, untyped]>; 2577 2578def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>; 2579def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>; 2580def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>; 2581def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>; 2582def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>; 2583def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>; 2584def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>; 2585def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>; 2586def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=*/1>; 2587def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>; 2588def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>; 2589 2590def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>; 2591def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>; 2592def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>; 2593 2594def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>; 2595def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>; 2596def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>; 2597def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>; 2598def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>; 2599def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>; 2600def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>; 2601def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>; 2602def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>; 2603 2604def VOP_F32_V2F16_V2F16_F32 : VOPProfile <[f32, v2f16, v2f16, f32]>; 2605def VOP_I32_V2I16_V2I16_I32 : VOPProfile <[i32, v2i16, v2i16, i32]>; 2606 2607def VOP_V4F32_F32_F32_V4F32 : VOPProfile <[v4f32, f32, f32, v4f32]>; 2608def VOP_V16F32_F32_F32_V16F32 : VOPProfile <[v16f32, f32, f32, v16f32]>; 2609def VOP_V32F32_F32_F32_V32F32 : VOPProfile <[v32f32, f32, f32, v32f32]>; 2610def VOP_V4F32_V4F16_V4F16_V4F32 : VOPProfile <[v4f32, v4f16, v4f16, v4f32]>; 2611def VOP_V16F32_V4F16_V4F16_V16F32 : VOPProfile <[v16f32, v4f16, v4f16, v16f32]>; 2612def VOP_V32F32_V4F16_V4F16_V32F32 : VOPProfile <[v32f32, v4f16, v4f16, v32f32]>; 2613def VOP_V4F32_V2I16_V2I16_V4F32 : VOPProfile <[v4f32, v2i16, v2i16, v4f32]>; 2614def VOP_V16F32_V2I16_V2I16_V16F32 : VOPProfile <[v16f32, v2i16, v2i16, v16f32]>; 2615def VOP_V32F32_V2I16_V2I16_V32F32 : VOPProfile <[v32f32, v2i16, v2i16, v32f32]>; 2616def VOP_V4I32_I32_I32_V4I32 : VOPProfile <[v4i32, i32, i32, v4i32]>; 2617def VOP_V16I32_I32_I32_V16I32 : VOPProfile <[v16i32, i32, i32, v16i32]>; 2618def VOP_V32I32_I32_I32_V32I32 : VOPProfile <[v32i32, i32, i32, v32i32]>; 2619 2620def VOP_V4F64_F64_F64_V4F64 : VOPProfile <[v4f64, f64, f64, v4f64]>; 2621def VOP_V1F64_F64_F64_V1F64 : VOPProfile <[v1f64, f64, f64, v1f64]>; 2622 2623def VOP_V2F32_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, v2f32]>; 2624def VOP_V2F32_V2F32_V2F32 : VOPProfile <[v2f32, v2f32, v2f32, untyped]>; 2625def VOP_V2I32_V2I32_V2I32 : VOPProfile <[v2i32, v2i32, v2i32, untyped]>; 2626def VOP_V4F32_V4I16_V4I16_V4F32 : VOPProfile <[v4f32, v4i16, v4i16, v4f32]>; 2627def VOP_V16F32_V4I16_V4I16_V16F32 : VOPProfile <[v16f32, v4i16, v4i16, v16f32]>; 2628def VOP_V32F32_V4I16_V4I16_V32F32 : VOPProfile <[v32f32, v4i16, v4i16, v32f32]>; 2629 2630def VOP_V4I32_I64_I64_V4I32 : VOPProfile <[v4i32, i64, i64, v4i32]>; 2631def VOP_V16I32_I64_I64_V16I32 : VOPProfile <[v16i32, i64, i64, v16i32]>; 2632def VOP_V4F32_V2F32_V2F32_V4F32 : VOPProfile <[v4f32, v2f32, v2f32, v4f32]>; 2633def VOP_V16F32_V2F32_V2F32_V16F32 : VOPProfile <[v16f32, v2f32, v2f32, v16f32]>; 2634def VOP_V4F32_I64_I64_V4F32 : VOPProfile <[v4f32, i64, i64, v4f32]>; 2635def VOP_V16F32_I64_I64_V16F32 : VOPProfile <[v16f32, i64, i64, v16f32]>; 2636 2637def VOP_V4F32_V4F16_V8F16_I32 : VOPProfile <[v4f32, v4f16, v8f16, i32]>; 2638def VOP_V16F32_V4F16_V8F16_I32 : VOPProfile <[v16f32, v4f16, v8f16, i32]>; 2639def VOP_V4F32_V4I16_V8I16_I32 : VOPProfile <[v4f32, v4i16, v8i16, i32]>; 2640def VOP_V16F32_V4I16_V8I16_I32 : VOPProfile <[v16f32, v4i16, v8i16, i32]>; 2641def VOP_V4I32_V2I32_V4I32_I32 : VOPProfile <[v4i32, v2i32, v4i32, i32]>; 2642def VOP_V16I32_V2I32_V4I32_I32 : VOPProfile <[v16i32, v2i32, v4i32, i32]>; 2643def VOP_V4F32_V2I32_V4I32_I32 : VOPProfile <[v4f32, v2i32, v4i32, i32]>; 2644def VOP_V16F32_V2I32_V4I32_I32 : VOPProfile <[v16f32, v2i32, v4i32, i32]>; 2645 2646class Commutable_REV <string revOp, bit isOrig> { 2647 string RevOp = revOp; 2648 bit IsOrig = isOrig; 2649} 2650 2651//===----------------------------------------------------------------------===// 2652// Interpolation opcodes 2653//===----------------------------------------------------------------------===// 2654 2655class VINTRPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVINTRPDst">; 2656 2657class VINTRP_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> : 2658 VINTRPCommon <outs, ins, "", pattern>, 2659 SIMCInstr<opName, SIEncodingFamily.NONE> { 2660 let isPseudo = 1; 2661 let isCodeGenOnly = 1; 2662} 2663 2664// FIXME-GFX10: WIP. 2665class VINTRP_Real_si <bits <2> op, string opName, dag outs, dag ins, 2666 string asm, int encodingFamily> : 2667 VINTRPCommon <outs, ins, asm, []>, 2668 VINTRPe <op>, 2669 SIMCInstr<opName, encodingFamily> { 2670} 2671 2672class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins, 2673 string asm> : 2674 VINTRPCommon <outs, ins, asm, []>, 2675 VINTRPe_vi <op>, 2676 SIMCInstr<opName, SIEncodingFamily.VI> { 2677 let AssemblerPredicate = isGFX8GFX9; 2678 let DecoderNamespace = "GFX8"; 2679} 2680 2681// FIXME-GFX10: WIP. 2682multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm, 2683 list<dag> pattern = []> { 2684 def "" : VINTRP_Pseudo <NAME, outs, ins, pattern>; 2685 2686 let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in { 2687 def _si : VINTRP_Real_si <op, NAME, outs, ins, asm, SIEncodingFamily.SI>; 2688 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" 2689 2690 def _vi : VINTRP_Real_vi <op, NAME, outs, ins, asm>; 2691 2692 let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { 2693 def _gfx10 : VINTRP_Real_si<op, NAME, outs, ins, asm, SIEncodingFamily.GFX10>; 2694 } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" 2695} 2696 2697//===----------------------------------------------------------------------===// 2698// Vector instruction mappings 2699//===----------------------------------------------------------------------===// 2700 2701// Maps an opcode in e32 form to its e64 equivalent 2702def getVOPe64 : InstrMapping { 2703 let FilterClass = "VOP"; 2704 let RowFields = ["OpName"]; 2705 let ColFields = ["Size", "VOP3"]; 2706 let KeyCol = ["4", "0"]; 2707 let ValueCols = [["8", "1"]]; 2708} 2709 2710// Maps an opcode in e64 form to its e32 equivalent 2711def getVOPe32 : InstrMapping { 2712 let FilterClass = "VOP"; 2713 let RowFields = ["OpName"]; 2714 let ColFields = ["Size", "VOP3"]; 2715 let KeyCol = ["8", "1"]; 2716 let ValueCols = [["4", "0"]]; 2717} 2718 2719// Maps ordinary instructions to their SDWA counterparts 2720def getSDWAOp : InstrMapping { 2721 let FilterClass = "VOP"; 2722 let RowFields = ["OpName"]; 2723 let ColFields = ["AsmVariantName"]; 2724 let KeyCol = ["Default"]; 2725 let ValueCols = [["SDWA"]]; 2726} 2727 2728// Maps SDWA instructions to their ordinary counterparts 2729def getBasicFromSDWAOp : InstrMapping { 2730 let FilterClass = "VOP"; 2731 let RowFields = ["OpName"]; 2732 let ColFields = ["AsmVariantName"]; 2733 let KeyCol = ["SDWA"]; 2734 let ValueCols = [["Default"]]; 2735} 2736 2737// Maps ordinary instructions to their DPP counterparts 2738def getDPPOp32 : InstrMapping { 2739 let FilterClass = "VOP"; 2740 let RowFields = ["OpName"]; 2741 let ColFields = ["AsmVariantName"]; 2742 let KeyCol = ["Default"]; 2743 let ValueCols = [["DPP"]]; 2744} 2745 2746def getDPPOp64 : InstrMapping { 2747 let FilterClass = "VOP"; 2748 let RowFields = ["OpName"]; 2749 let ColFields = ["AsmVariantName"]; 2750 let KeyCol = ["VOP3"]; 2751 let ValueCols = [["VOP3_DPP"]]; 2752} 2753 2754// Maps an commuted opcode to its original version 2755def getCommuteOrig : InstrMapping { 2756 let FilterClass = "Commutable_REV"; 2757 let RowFields = ["RevOp"]; 2758 let ColFields = ["IsOrig"]; 2759 let KeyCol = ["0"]; 2760 let ValueCols = [["1"]]; 2761} 2762 2763// Maps an original opcode to its commuted version 2764def getCommuteRev : InstrMapping { 2765 let FilterClass = "Commutable_REV"; 2766 let RowFields = ["RevOp"]; 2767 let ColFields = ["IsOrig"]; 2768 let KeyCol = ["1"]; 2769 let ValueCols = [["0"]]; 2770} 2771 2772def getMCOpcodeGen : InstrMapping { 2773 let FilterClass = "SIMCInstr"; 2774 let RowFields = ["PseudoInstr"]; 2775 let ColFields = ["Subtarget"]; 2776 let KeyCol = [!cast<string>(SIEncodingFamily.NONE)]; 2777 // These columns must be kept in sync with the SIEncodingFamily enumeration. 2778 let ValueCols = [[!cast<string>(SIEncodingFamily.SI)], 2779 [!cast<string>(SIEncodingFamily.VI)], 2780 [!cast<string>(SIEncodingFamily.SDWA)], 2781 [!cast<string>(SIEncodingFamily.SDWA9)], 2782 // GFX80 encoding is added to work around a multiple matching 2783 // issue for buffer instructions with unpacked d16 data. This 2784 // does not actually change the encoding, and thus may be 2785 // removed later. 2786 [!cast<string>(SIEncodingFamily.GFX80)], 2787 [!cast<string>(SIEncodingFamily.GFX9)], 2788 [!cast<string>(SIEncodingFamily.GFX10)], 2789 [!cast<string>(SIEncodingFamily.SDWA10)], 2790 [!cast<string>(SIEncodingFamily.GFX90A)], 2791 [!cast<string>(SIEncodingFamily.GFX940)], 2792 [!cast<string>(SIEncodingFamily.GFX11)], 2793 [!cast<string>(SIEncodingFamily.GFX12)]]; 2794} 2795 2796// Get equivalent SOPK instruction. 2797def getSOPKOp : InstrMapping { 2798 let FilterClass = "SOPKInstTable"; 2799 let RowFields = ["BaseCmpOp"]; 2800 let ColFields = ["IsSOPK"]; 2801 let KeyCol = ["0"]; 2802 let ValueCols = [["1"]]; 2803} 2804 2805def getAddr64Inst : InstrMapping { 2806 let FilterClass = "MUBUFAddr64Table"; 2807 let RowFields = ["OpName"]; 2808 let ColFields = ["IsAddr64"]; 2809 let KeyCol = ["0"]; 2810 let ValueCols = [["1"]]; 2811} 2812 2813def getIfAddr64Inst : InstrMapping { 2814 let FilterClass = "MUBUFAddr64Table"; 2815 let RowFields = ["OpName"]; 2816 let ColFields = ["IsAddr64"]; 2817 let KeyCol = ["1"]; 2818 let ValueCols = [["1"]]; 2819} 2820 2821// Maps a GLOBAL to its SADDR form. 2822def getGlobalSaddrOp : InstrMapping { 2823 let FilterClass = "GlobalSaddrTable"; 2824 let RowFields = ["SaddrOp"]; 2825 let ColFields = ["IsSaddr"]; 2826 let KeyCol = ["0"]; 2827 let ValueCols = [["1"]]; 2828} 2829 2830// Maps a GLOBAL SADDR to its VADDR form. 2831def getGlobalVaddrOp : InstrMapping { 2832 let FilterClass = "GlobalSaddrTable"; 2833 let RowFields = ["SaddrOp"]; 2834 let ColFields = ["IsSaddr"]; 2835 let KeyCol = ["1"]; 2836 let ValueCols = [["0"]]; 2837} 2838 2839// Maps a v_cmpx opcode with sdst to opcode without sdst. 2840def getVCMPXNoSDstOp : InstrMapping { 2841 let FilterClass = "VCMPXNoSDstTable"; 2842 let RowFields = ["NoSDstOp"]; 2843 let ColFields = ["HasSDst"]; 2844 let KeyCol = ["1"]; 2845 let ValueCols = [["0"]]; 2846} 2847 2848// Maps a SOPP to a SOPP with S_NOP 2849def getSOPPWithRelaxation : InstrMapping { 2850 let FilterClass = "SOPPRelaxTable"; 2851 let RowFields = ["KeyName"]; 2852 let ColFields = ["IsRelaxed"]; 2853 let KeyCol = ["0"]; 2854 let ValueCols = [["1"]]; 2855} 2856 2857// Maps flat scratch opcodes by addressing modes 2858def getFlatScratchInstSTfromSS : InstrMapping { 2859 let FilterClass = "FlatScratchInst"; 2860 let RowFields = ["SVOp"]; 2861 let ColFields = ["Mode"]; 2862 let KeyCol = ["SS"]; 2863 let ValueCols = [["ST"]]; 2864} 2865 2866def getFlatScratchInstSSfromSV : InstrMapping { 2867 let FilterClass = "FlatScratchInst"; 2868 let RowFields = ["SVOp"]; 2869 let ColFields = ["Mode"]; 2870 let KeyCol = ["SV"]; 2871 let ValueCols = [["SS"]]; 2872} 2873 2874def getFlatScratchInstSVfromSVS : InstrMapping { 2875 let FilterClass = "FlatScratchInst"; 2876 let RowFields = ["SVOp"]; 2877 let ColFields = ["Mode"]; 2878 let KeyCol = ["SVS"]; 2879 let ValueCols = [["SV"]]; 2880} 2881 2882def getFlatScratchInstSVfromSS : InstrMapping { 2883 let FilterClass = "FlatScratchInst"; 2884 let RowFields = ["SVOp"]; 2885 let ColFields = ["Mode"]; 2886 let KeyCol = ["SS"]; 2887 let ValueCols = [["SV"]]; 2888} 2889 2890def getMFMAEarlyClobberOp : InstrMapping { 2891 let FilterClass = "MFMATable"; 2892 let RowFields = ["FMAOp"]; 2893 let ColFields = ["IsMac"]; 2894 let KeyCol = ["1"]; 2895 let ValueCols = [["0"]]; 2896} 2897 2898// Maps an v_cmp instruction to its v_cmpx equivalent. 2899def getVCMPXOpFromVCMP : InstrMapping { 2900 let FilterClass = "VCMPVCMPXTable"; 2901 let RowFields = ["VCMPOp"]; 2902 let ColFields = ["IsVCMPX"]; 2903 let KeyCol = ["0"]; 2904 let ValueCols = [["1"]]; 2905} 2906 2907def VOPDComponentTable : GenericTable { 2908 let FilterClass = "VOPD_Component"; 2909 let CppTypeName = "VOPDComponentInfo"; 2910 let Fields = ["BaseVOP", "VOPDOp", "CanBeVOPDX"]; 2911 let PrimaryKey = ["BaseVOP"]; 2912 let PrimaryKeyName = "getVOPDComponentHelper"; 2913} 2914 2915def getVOPDBaseFromComponent : SearchIndex { 2916 let Table = VOPDComponentTable; 2917 let Key = ["VOPDOp"]; 2918} 2919 2920def VOPDPairs : GenericTable { 2921 let FilterClass = "VOPD_Base"; 2922 let CppTypeName = "VOPDInfo"; 2923 let Fields = ["Opcode", "OpX", "OpY", "SubTgt"]; 2924 let PrimaryKey = ["Opcode"]; 2925 let PrimaryKeyName = "getVOPDOpcodeHelper"; 2926} 2927 2928def getVOPDInfoFromComponentOpcodes : SearchIndex { 2929 let Table = VOPDPairs; 2930 let Key = ["OpX", "OpY", "SubTgt"]; 2931} 2932 2933include "SIInstructions.td" 2934 2935include "DSInstructions.td" 2936include "MIMGInstructions.td" 2937